1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__arm__)
13@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
14@
15@ Licensed under the OpenSSL license (the "License").  You may not use
16@ this file except in compliance with the License.  You can obtain a copy
17@ in the file LICENSE in the source distribution or at
18@ https://www.openssl.org/source/license.html
19
20
21@ ====================================================================
22@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
23@ project. The module is, however, dual licensed under OpenSSL and
24@ CRYPTOGAMS licenses depending on where you obtain it. For further
25@ details see http://www.openssl.org/~appro/cryptogams/.
26@
27@ Permission to use under GPL terms is granted.
28@ ====================================================================
29
30@ SHA256 block procedure for ARMv4. May 2007.
31
32@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
33@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
34@ byte [on single-issue Xscale PXA250 core].
35
36@ July 2010.
37@
38@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
39@ Cortex A8 core and ~20 cycles per processed byte.
40
41@ February 2011.
42@
43@ Profiler-assisted and platform-specific optimization resulted in 16%
44@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
45
46@ September 2013.
47@
48@ Add NEON implementation. On Cortex A8 it was measured to process one
49@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
50@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
51@ code (meaning that latter performs sub-optimally, nothing was done
52@ about it).
53
54@ May 2014.
55@
56@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
57
58#ifndef __KERNEL__
59# include <GFp/arm_arch.h>
60#else
61# define __ARM_ARCH__ __LINUX_ARM_ARCH__
62# define __ARM_MAX_ARCH__ 7
63#endif
64
65@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
66@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
67@ instructions are manually-encoded. (See unsha256.)
68.arch	armv7-a
69
70.text
71#if defined(__thumb2__)
72.syntax	unified
73.thumb
74#else
75.code	32
76#endif
77
78.type	K256,%object
79.align	5
80K256:
81.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
82.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
83.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
84.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
85.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
86.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
87.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
88.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
89.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
90.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
91.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
92.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
93.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
94.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
95.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
96.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
97.size	K256,.-K256
98.word	0				@ terminator
99#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
100
101.hidden	GFp_armcap_P
102.LOPENSSL_armcap:
103.word	GFp_armcap_P-.Lsha256_block_data_order
104#endif
105.align	5
106
107.globl	GFp_sha256_block_data_order
108.hidden	GFp_sha256_block_data_order
109.type	GFp_sha256_block_data_order,%function
110GFp_sha256_block_data_order:
111.Lsha256_block_data_order:
112#if __ARM_ARCH__<7 && !defined(__thumb2__)
113	sub	r3,pc,#8		@ GFp_sha256_block_data_order
114#else
115	adr	r3,.Lsha256_block_data_order
116#endif
117#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
118	ldr	r12,.LOPENSSL_armcap
119	ldr	r12,[r3,r12]		@ GFp_armcap_P
120#ifdef	__APPLE__
121	ldr	r12,[r12]
122#endif
123	tst	r12,#ARMV8_SHA256
124	bne	.LARMv8
125	tst	r12,#ARMV7_NEON
126	bne	.LNEON
127#endif
128	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
129	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
130	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
131	sub	r14,r3,#256+32	@ K256
132	sub	sp,sp,#16*4		@ alloca(X[16])
133.Loop:
134# if __ARM_ARCH__>=7
135	ldr	r2,[r1],#4
136# else
137	ldrb	r2,[r1,#3]
138# endif
139	eor	r3,r5,r6		@ magic
140	eor	r12,r12,r12
141#if __ARM_ARCH__>=7
142	@ ldr	r2,[r1],#4			@ 0
143# if 0==15
144	str	r1,[sp,#17*4]			@ make room for r1
145# endif
146	eor	r0,r8,r8,ror#5
147	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
148	eor	r0,r0,r8,ror#19	@ Sigma1(e)
149# ifndef __ARMEB__
150	rev	r2,r2
151# endif
152#else
153	@ ldrb	r2,[r1,#3]			@ 0
154	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
155	ldrb	r12,[r1,#2]
156	ldrb	r0,[r1,#1]
157	orr	r2,r2,r12,lsl#8
158	ldrb	r12,[r1],#4
159	orr	r2,r2,r0,lsl#16
160# if 0==15
161	str	r1,[sp,#17*4]			@ make room for r1
162# endif
163	eor	r0,r8,r8,ror#5
164	orr	r2,r2,r12,lsl#24
165	eor	r0,r0,r8,ror#19	@ Sigma1(e)
166#endif
167	ldr	r12,[r14],#4			@ *K256++
168	add	r11,r11,r2			@ h+=X[i]
169	str	r2,[sp,#0*4]
170	eor	r2,r9,r10
171	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
172	and	r2,r2,r8
173	add	r11,r11,r12			@ h+=K256[i]
174	eor	r2,r2,r10			@ Ch(e,f,g)
175	eor	r0,r4,r4,ror#11
176	add	r11,r11,r2			@ h+=Ch(e,f,g)
177#if 0==31
178	and	r12,r12,#0xff
179	cmp	r12,#0xf2			@ done?
180#endif
181#if 0<15
182# if __ARM_ARCH__>=7
183	ldr	r2,[r1],#4			@ prefetch
184# else
185	ldrb	r2,[r1,#3]
186# endif
187	eor	r12,r4,r5			@ a^b, b^c in next round
188#else
189	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
190	eor	r12,r4,r5			@ a^b, b^c in next round
191	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
192#endif
193	eor	r0,r0,r4,ror#20	@ Sigma0(a)
194	and	r3,r3,r12			@ (b^c)&=(a^b)
195	add	r7,r7,r11			@ d+=h
196	eor	r3,r3,r5			@ Maj(a,b,c)
197	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
198	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
199#if __ARM_ARCH__>=7
200	@ ldr	r2,[r1],#4			@ 1
201# if 1==15
202	str	r1,[sp,#17*4]			@ make room for r1
203# endif
204	eor	r0,r7,r7,ror#5
205	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
206	eor	r0,r0,r7,ror#19	@ Sigma1(e)
207# ifndef __ARMEB__
208	rev	r2,r2
209# endif
210#else
211	@ ldrb	r2,[r1,#3]			@ 1
212	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
213	ldrb	r3,[r1,#2]
214	ldrb	r0,[r1,#1]
215	orr	r2,r2,r3,lsl#8
216	ldrb	r3,[r1],#4
217	orr	r2,r2,r0,lsl#16
218# if 1==15
219	str	r1,[sp,#17*4]			@ make room for r1
220# endif
221	eor	r0,r7,r7,ror#5
222	orr	r2,r2,r3,lsl#24
223	eor	r0,r0,r7,ror#19	@ Sigma1(e)
224#endif
225	ldr	r3,[r14],#4			@ *K256++
226	add	r10,r10,r2			@ h+=X[i]
227	str	r2,[sp,#1*4]
228	eor	r2,r8,r9
229	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
230	and	r2,r2,r7
231	add	r10,r10,r3			@ h+=K256[i]
232	eor	r2,r2,r9			@ Ch(e,f,g)
233	eor	r0,r11,r11,ror#11
234	add	r10,r10,r2			@ h+=Ch(e,f,g)
235#if 1==31
236	and	r3,r3,#0xff
237	cmp	r3,#0xf2			@ done?
238#endif
239#if 1<15
240# if __ARM_ARCH__>=7
241	ldr	r2,[r1],#4			@ prefetch
242# else
243	ldrb	r2,[r1,#3]
244# endif
245	eor	r3,r11,r4			@ a^b, b^c in next round
246#else
247	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
248	eor	r3,r11,r4			@ a^b, b^c in next round
249	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
250#endif
251	eor	r0,r0,r11,ror#20	@ Sigma0(a)
252	and	r12,r12,r3			@ (b^c)&=(a^b)
253	add	r6,r6,r10			@ d+=h
254	eor	r12,r12,r4			@ Maj(a,b,c)
255	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
256	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
257#if __ARM_ARCH__>=7
258	@ ldr	r2,[r1],#4			@ 2
259# if 2==15
260	str	r1,[sp,#17*4]			@ make room for r1
261# endif
262	eor	r0,r6,r6,ror#5
263	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
264	eor	r0,r0,r6,ror#19	@ Sigma1(e)
265# ifndef __ARMEB__
266	rev	r2,r2
267# endif
268#else
269	@ ldrb	r2,[r1,#3]			@ 2
270	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
271	ldrb	r12,[r1,#2]
272	ldrb	r0,[r1,#1]
273	orr	r2,r2,r12,lsl#8
274	ldrb	r12,[r1],#4
275	orr	r2,r2,r0,lsl#16
276# if 2==15
277	str	r1,[sp,#17*4]			@ make room for r1
278# endif
279	eor	r0,r6,r6,ror#5
280	orr	r2,r2,r12,lsl#24
281	eor	r0,r0,r6,ror#19	@ Sigma1(e)
282#endif
283	ldr	r12,[r14],#4			@ *K256++
284	add	r9,r9,r2			@ h+=X[i]
285	str	r2,[sp,#2*4]
286	eor	r2,r7,r8
287	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
288	and	r2,r2,r6
289	add	r9,r9,r12			@ h+=K256[i]
290	eor	r2,r2,r8			@ Ch(e,f,g)
291	eor	r0,r10,r10,ror#11
292	add	r9,r9,r2			@ h+=Ch(e,f,g)
293#if 2==31
294	and	r12,r12,#0xff
295	cmp	r12,#0xf2			@ done?
296#endif
297#if 2<15
298# if __ARM_ARCH__>=7
299	ldr	r2,[r1],#4			@ prefetch
300# else
301	ldrb	r2,[r1,#3]
302# endif
303	eor	r12,r10,r11			@ a^b, b^c in next round
304#else
305	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
306	eor	r12,r10,r11			@ a^b, b^c in next round
307	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
308#endif
309	eor	r0,r0,r10,ror#20	@ Sigma0(a)
310	and	r3,r3,r12			@ (b^c)&=(a^b)
311	add	r5,r5,r9			@ d+=h
312	eor	r3,r3,r11			@ Maj(a,b,c)
313	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
314	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
315#if __ARM_ARCH__>=7
316	@ ldr	r2,[r1],#4			@ 3
317# if 3==15
318	str	r1,[sp,#17*4]			@ make room for r1
319# endif
320	eor	r0,r5,r5,ror#5
321	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
322	eor	r0,r0,r5,ror#19	@ Sigma1(e)
323# ifndef __ARMEB__
324	rev	r2,r2
325# endif
326#else
327	@ ldrb	r2,[r1,#3]			@ 3
328	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
329	ldrb	r3,[r1,#2]
330	ldrb	r0,[r1,#1]
331	orr	r2,r2,r3,lsl#8
332	ldrb	r3,[r1],#4
333	orr	r2,r2,r0,lsl#16
334# if 3==15
335	str	r1,[sp,#17*4]			@ make room for r1
336# endif
337	eor	r0,r5,r5,ror#5
338	orr	r2,r2,r3,lsl#24
339	eor	r0,r0,r5,ror#19	@ Sigma1(e)
340#endif
341	ldr	r3,[r14],#4			@ *K256++
342	add	r8,r8,r2			@ h+=X[i]
343	str	r2,[sp,#3*4]
344	eor	r2,r6,r7
345	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
346	and	r2,r2,r5
347	add	r8,r8,r3			@ h+=K256[i]
348	eor	r2,r2,r7			@ Ch(e,f,g)
349	eor	r0,r9,r9,ror#11
350	add	r8,r8,r2			@ h+=Ch(e,f,g)
351#if 3==31
352	and	r3,r3,#0xff
353	cmp	r3,#0xf2			@ done?
354#endif
355#if 3<15
356# if __ARM_ARCH__>=7
357	ldr	r2,[r1],#4			@ prefetch
358# else
359	ldrb	r2,[r1,#3]
360# endif
361	eor	r3,r9,r10			@ a^b, b^c in next round
362#else
363	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
364	eor	r3,r9,r10			@ a^b, b^c in next round
365	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
366#endif
367	eor	r0,r0,r9,ror#20	@ Sigma0(a)
368	and	r12,r12,r3			@ (b^c)&=(a^b)
369	add	r4,r4,r8			@ d+=h
370	eor	r12,r12,r10			@ Maj(a,b,c)
371	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
372	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
373#if __ARM_ARCH__>=7
374	@ ldr	r2,[r1],#4			@ 4
375# if 4==15
376	str	r1,[sp,#17*4]			@ make room for r1
377# endif
378	eor	r0,r4,r4,ror#5
379	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
380	eor	r0,r0,r4,ror#19	@ Sigma1(e)
381# ifndef __ARMEB__
382	rev	r2,r2
383# endif
384#else
385	@ ldrb	r2,[r1,#3]			@ 4
386	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
387	ldrb	r12,[r1,#2]
388	ldrb	r0,[r1,#1]
389	orr	r2,r2,r12,lsl#8
390	ldrb	r12,[r1],#4
391	orr	r2,r2,r0,lsl#16
392# if 4==15
393	str	r1,[sp,#17*4]			@ make room for r1
394# endif
395	eor	r0,r4,r4,ror#5
396	orr	r2,r2,r12,lsl#24
397	eor	r0,r0,r4,ror#19	@ Sigma1(e)
398#endif
399	ldr	r12,[r14],#4			@ *K256++
400	add	r7,r7,r2			@ h+=X[i]
401	str	r2,[sp,#4*4]
402	eor	r2,r5,r6
403	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
404	and	r2,r2,r4
405	add	r7,r7,r12			@ h+=K256[i]
406	eor	r2,r2,r6			@ Ch(e,f,g)
407	eor	r0,r8,r8,ror#11
408	add	r7,r7,r2			@ h+=Ch(e,f,g)
409#if 4==31
410	and	r12,r12,#0xff
411	cmp	r12,#0xf2			@ done?
412#endif
413#if 4<15
414# if __ARM_ARCH__>=7
415	ldr	r2,[r1],#4			@ prefetch
416# else
417	ldrb	r2,[r1,#3]
418# endif
419	eor	r12,r8,r9			@ a^b, b^c in next round
420#else
421	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
422	eor	r12,r8,r9			@ a^b, b^c in next round
423	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
424#endif
425	eor	r0,r0,r8,ror#20	@ Sigma0(a)
426	and	r3,r3,r12			@ (b^c)&=(a^b)
427	add	r11,r11,r7			@ d+=h
428	eor	r3,r3,r9			@ Maj(a,b,c)
429	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
430	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
431#if __ARM_ARCH__>=7
432	@ ldr	r2,[r1],#4			@ 5
433# if 5==15
434	str	r1,[sp,#17*4]			@ make room for r1
435# endif
436	eor	r0,r11,r11,ror#5
437	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
438	eor	r0,r0,r11,ror#19	@ Sigma1(e)
439# ifndef __ARMEB__
440	rev	r2,r2
441# endif
442#else
443	@ ldrb	r2,[r1,#3]			@ 5
444	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
445	ldrb	r3,[r1,#2]
446	ldrb	r0,[r1,#1]
447	orr	r2,r2,r3,lsl#8
448	ldrb	r3,[r1],#4
449	orr	r2,r2,r0,lsl#16
450# if 5==15
451	str	r1,[sp,#17*4]			@ make room for r1
452# endif
453	eor	r0,r11,r11,ror#5
454	orr	r2,r2,r3,lsl#24
455	eor	r0,r0,r11,ror#19	@ Sigma1(e)
456#endif
457	ldr	r3,[r14],#4			@ *K256++
458	add	r6,r6,r2			@ h+=X[i]
459	str	r2,[sp,#5*4]
460	eor	r2,r4,r5
461	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
462	and	r2,r2,r11
463	add	r6,r6,r3			@ h+=K256[i]
464	eor	r2,r2,r5			@ Ch(e,f,g)
465	eor	r0,r7,r7,ror#11
466	add	r6,r6,r2			@ h+=Ch(e,f,g)
467#if 5==31
468	and	r3,r3,#0xff
469	cmp	r3,#0xf2			@ done?
470#endif
471#if 5<15
472# if __ARM_ARCH__>=7
473	ldr	r2,[r1],#4			@ prefetch
474# else
475	ldrb	r2,[r1,#3]
476# endif
477	eor	r3,r7,r8			@ a^b, b^c in next round
478#else
479	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
480	eor	r3,r7,r8			@ a^b, b^c in next round
481	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
482#endif
483	eor	r0,r0,r7,ror#20	@ Sigma0(a)
484	and	r12,r12,r3			@ (b^c)&=(a^b)
485	add	r10,r10,r6			@ d+=h
486	eor	r12,r12,r8			@ Maj(a,b,c)
487	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
488	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
489#if __ARM_ARCH__>=7
490	@ ldr	r2,[r1],#4			@ 6
491# if 6==15
492	str	r1,[sp,#17*4]			@ make room for r1
493# endif
494	eor	r0,r10,r10,ror#5
495	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
496	eor	r0,r0,r10,ror#19	@ Sigma1(e)
497# ifndef __ARMEB__
498	rev	r2,r2
499# endif
500#else
501	@ ldrb	r2,[r1,#3]			@ 6
502	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
503	ldrb	r12,[r1,#2]
504	ldrb	r0,[r1,#1]
505	orr	r2,r2,r12,lsl#8
506	ldrb	r12,[r1],#4
507	orr	r2,r2,r0,lsl#16
508# if 6==15
509	str	r1,[sp,#17*4]			@ make room for r1
510# endif
511	eor	r0,r10,r10,ror#5
512	orr	r2,r2,r12,lsl#24
513	eor	r0,r0,r10,ror#19	@ Sigma1(e)
514#endif
515	ldr	r12,[r14],#4			@ *K256++
516	add	r5,r5,r2			@ h+=X[i]
517	str	r2,[sp,#6*4]
518	eor	r2,r11,r4
519	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
520	and	r2,r2,r10
521	add	r5,r5,r12			@ h+=K256[i]
522	eor	r2,r2,r4			@ Ch(e,f,g)
523	eor	r0,r6,r6,ror#11
524	add	r5,r5,r2			@ h+=Ch(e,f,g)
525#if 6==31
526	and	r12,r12,#0xff
527	cmp	r12,#0xf2			@ done?
528#endif
529#if 6<15
530# if __ARM_ARCH__>=7
531	ldr	r2,[r1],#4			@ prefetch
532# else
533	ldrb	r2,[r1,#3]
534# endif
535	eor	r12,r6,r7			@ a^b, b^c in next round
536#else
537	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
538	eor	r12,r6,r7			@ a^b, b^c in next round
539	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
540#endif
541	eor	r0,r0,r6,ror#20	@ Sigma0(a)
542	and	r3,r3,r12			@ (b^c)&=(a^b)
543	add	r9,r9,r5			@ d+=h
544	eor	r3,r3,r7			@ Maj(a,b,c)
545	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
546	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
547#if __ARM_ARCH__>=7
548	@ ldr	r2,[r1],#4			@ 7
549# if 7==15
550	str	r1,[sp,#17*4]			@ make room for r1
551# endif
552	eor	r0,r9,r9,ror#5
553	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
554	eor	r0,r0,r9,ror#19	@ Sigma1(e)
555# ifndef __ARMEB__
556	rev	r2,r2
557# endif
558#else
559	@ ldrb	r2,[r1,#3]			@ 7
560	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
561	ldrb	r3,[r1,#2]
562	ldrb	r0,[r1,#1]
563	orr	r2,r2,r3,lsl#8
564	ldrb	r3,[r1],#4
565	orr	r2,r2,r0,lsl#16
566# if 7==15
567	str	r1,[sp,#17*4]			@ make room for r1
568# endif
569	eor	r0,r9,r9,ror#5
570	orr	r2,r2,r3,lsl#24
571	eor	r0,r0,r9,ror#19	@ Sigma1(e)
572#endif
573	ldr	r3,[r14],#4			@ *K256++
574	add	r4,r4,r2			@ h+=X[i]
575	str	r2,[sp,#7*4]
576	eor	r2,r10,r11
577	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
578	and	r2,r2,r9
579	add	r4,r4,r3			@ h+=K256[i]
580	eor	r2,r2,r11			@ Ch(e,f,g)
581	eor	r0,r5,r5,ror#11
582	add	r4,r4,r2			@ h+=Ch(e,f,g)
583#if 7==31
584	and	r3,r3,#0xff
585	cmp	r3,#0xf2			@ done?
586#endif
587#if 7<15
588# if __ARM_ARCH__>=7
589	ldr	r2,[r1],#4			@ prefetch
590# else
591	ldrb	r2,[r1,#3]
592# endif
593	eor	r3,r5,r6			@ a^b, b^c in next round
594#else
595	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
596	eor	r3,r5,r6			@ a^b, b^c in next round
597	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
598#endif
599	eor	r0,r0,r5,ror#20	@ Sigma0(a)
600	and	r12,r12,r3			@ (b^c)&=(a^b)
601	add	r8,r8,r4			@ d+=h
602	eor	r12,r12,r6			@ Maj(a,b,c)
603	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
604	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
605#if __ARM_ARCH__>=7
606	@ ldr	r2,[r1],#4			@ 8
607# if 8==15
608	str	r1,[sp,#17*4]			@ make room for r1
609# endif
610	eor	r0,r8,r8,ror#5
611	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
612	eor	r0,r0,r8,ror#19	@ Sigma1(e)
613# ifndef __ARMEB__
614	rev	r2,r2
615# endif
616#else
617	@ ldrb	r2,[r1,#3]			@ 8
618	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
619	ldrb	r12,[r1,#2]
620	ldrb	r0,[r1,#1]
621	orr	r2,r2,r12,lsl#8
622	ldrb	r12,[r1],#4
623	orr	r2,r2,r0,lsl#16
624# if 8==15
625	str	r1,[sp,#17*4]			@ make room for r1
626# endif
627	eor	r0,r8,r8,ror#5
628	orr	r2,r2,r12,lsl#24
629	eor	r0,r0,r8,ror#19	@ Sigma1(e)
630#endif
631	ldr	r12,[r14],#4			@ *K256++
632	add	r11,r11,r2			@ h+=X[i]
633	str	r2,[sp,#8*4]
634	eor	r2,r9,r10
635	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
636	and	r2,r2,r8
637	add	r11,r11,r12			@ h+=K256[i]
638	eor	r2,r2,r10			@ Ch(e,f,g)
639	eor	r0,r4,r4,ror#11
640	add	r11,r11,r2			@ h+=Ch(e,f,g)
641#if 8==31
642	and	r12,r12,#0xff
643	cmp	r12,#0xf2			@ done?
644#endif
645#if 8<15
646# if __ARM_ARCH__>=7
647	ldr	r2,[r1],#4			@ prefetch
648# else
649	ldrb	r2,[r1,#3]
650# endif
651	eor	r12,r4,r5			@ a^b, b^c in next round
652#else
653	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
654	eor	r12,r4,r5			@ a^b, b^c in next round
655	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
656#endif
657	eor	r0,r0,r4,ror#20	@ Sigma0(a)
658	and	r3,r3,r12			@ (b^c)&=(a^b)
659	add	r7,r7,r11			@ d+=h
660	eor	r3,r3,r5			@ Maj(a,b,c)
661	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
662	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
663#if __ARM_ARCH__>=7
664	@ ldr	r2,[r1],#4			@ 9
665# if 9==15
666	str	r1,[sp,#17*4]			@ make room for r1
667# endif
668	eor	r0,r7,r7,ror#5
669	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
670	eor	r0,r0,r7,ror#19	@ Sigma1(e)
671# ifndef __ARMEB__
672	rev	r2,r2
673# endif
674#else
675	@ ldrb	r2,[r1,#3]			@ 9
676	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
677	ldrb	r3,[r1,#2]
678	ldrb	r0,[r1,#1]
679	orr	r2,r2,r3,lsl#8
680	ldrb	r3,[r1],#4
681	orr	r2,r2,r0,lsl#16
682# if 9==15
683	str	r1,[sp,#17*4]			@ make room for r1
684# endif
685	eor	r0,r7,r7,ror#5
686	orr	r2,r2,r3,lsl#24
687	eor	r0,r0,r7,ror#19	@ Sigma1(e)
688#endif
689	ldr	r3,[r14],#4			@ *K256++
690	add	r10,r10,r2			@ h+=X[i]
691	str	r2,[sp,#9*4]
692	eor	r2,r8,r9
693	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
694	and	r2,r2,r7
695	add	r10,r10,r3			@ h+=K256[i]
696	eor	r2,r2,r9			@ Ch(e,f,g)
697	eor	r0,r11,r11,ror#11
698	add	r10,r10,r2			@ h+=Ch(e,f,g)
699#if 9==31
700	and	r3,r3,#0xff
701	cmp	r3,#0xf2			@ done?
702#endif
703#if 9<15
704# if __ARM_ARCH__>=7
705	ldr	r2,[r1],#4			@ prefetch
706# else
707	ldrb	r2,[r1,#3]
708# endif
709	eor	r3,r11,r4			@ a^b, b^c in next round
710#else
711	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
712	eor	r3,r11,r4			@ a^b, b^c in next round
713	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
714#endif
715	eor	r0,r0,r11,ror#20	@ Sigma0(a)
716	and	r12,r12,r3			@ (b^c)&=(a^b)
717	add	r6,r6,r10			@ d+=h
718	eor	r12,r12,r4			@ Maj(a,b,c)
719	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
720	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
721#if __ARM_ARCH__>=7
722	@ ldr	r2,[r1],#4			@ 10
723# if 10==15
724	str	r1,[sp,#17*4]			@ make room for r1
725# endif
726	eor	r0,r6,r6,ror#5
727	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
728	eor	r0,r0,r6,ror#19	@ Sigma1(e)
729# ifndef __ARMEB__
730	rev	r2,r2
731# endif
732#else
733	@ ldrb	r2,[r1,#3]			@ 10
734	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
735	ldrb	r12,[r1,#2]
736	ldrb	r0,[r1,#1]
737	orr	r2,r2,r12,lsl#8
738	ldrb	r12,[r1],#4
739	orr	r2,r2,r0,lsl#16
740# if 10==15
741	str	r1,[sp,#17*4]			@ make room for r1
742# endif
743	eor	r0,r6,r6,ror#5
744	orr	r2,r2,r12,lsl#24
745	eor	r0,r0,r6,ror#19	@ Sigma1(e)
746#endif
747	ldr	r12,[r14],#4			@ *K256++
748	add	r9,r9,r2			@ h+=X[i]
749	str	r2,[sp,#10*4]
750	eor	r2,r7,r8
751	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
752	and	r2,r2,r6
753	add	r9,r9,r12			@ h+=K256[i]
754	eor	r2,r2,r8			@ Ch(e,f,g)
755	eor	r0,r10,r10,ror#11
756	add	r9,r9,r2			@ h+=Ch(e,f,g)
757#if 10==31
758	and	r12,r12,#0xff
759	cmp	r12,#0xf2			@ done?
760#endif
761#if 10<15
762# if __ARM_ARCH__>=7
763	ldr	r2,[r1],#4			@ prefetch
764# else
765	ldrb	r2,[r1,#3]
766# endif
767	eor	r12,r10,r11			@ a^b, b^c in next round
768#else
769	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
770	eor	r12,r10,r11			@ a^b, b^c in next round
771	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
772#endif
773	eor	r0,r0,r10,ror#20	@ Sigma0(a)
774	and	r3,r3,r12			@ (b^c)&=(a^b)
775	add	r5,r5,r9			@ d+=h
776	eor	r3,r3,r11			@ Maj(a,b,c)
777	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
778	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
779#if __ARM_ARCH__>=7
780	@ ldr	r2,[r1],#4			@ 11
781# if 11==15
782	str	r1,[sp,#17*4]			@ make room for r1
783# endif
784	eor	r0,r5,r5,ror#5
785	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
786	eor	r0,r0,r5,ror#19	@ Sigma1(e)
787# ifndef __ARMEB__
788	rev	r2,r2
789# endif
790#else
791	@ ldrb	r2,[r1,#3]			@ 11
792	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
793	ldrb	r3,[r1,#2]
794	ldrb	r0,[r1,#1]
795	orr	r2,r2,r3,lsl#8
796	ldrb	r3,[r1],#4
797	orr	r2,r2,r0,lsl#16
798# if 11==15
799	str	r1,[sp,#17*4]			@ make room for r1
800# endif
801	eor	r0,r5,r5,ror#5
802	orr	r2,r2,r3,lsl#24
803	eor	r0,r0,r5,ror#19	@ Sigma1(e)
804#endif
805	ldr	r3,[r14],#4			@ *K256++
806	add	r8,r8,r2			@ h+=X[i]
807	str	r2,[sp,#11*4]
808	eor	r2,r6,r7
809	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
810	and	r2,r2,r5
811	add	r8,r8,r3			@ h+=K256[i]
812	eor	r2,r2,r7			@ Ch(e,f,g)
813	eor	r0,r9,r9,ror#11
814	add	r8,r8,r2			@ h+=Ch(e,f,g)
815#if 11==31
816	and	r3,r3,#0xff
817	cmp	r3,#0xf2			@ done?
818#endif
819#if 11<15
820# if __ARM_ARCH__>=7
821	ldr	r2,[r1],#4			@ prefetch
822# else
823	ldrb	r2,[r1,#3]
824# endif
825	eor	r3,r9,r10			@ a^b, b^c in next round
826#else
827	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
828	eor	r3,r9,r10			@ a^b, b^c in next round
829	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
830#endif
831	eor	r0,r0,r9,ror#20	@ Sigma0(a)
832	and	r12,r12,r3			@ (b^c)&=(a^b)
833	add	r4,r4,r8			@ d+=h
834	eor	r12,r12,r10			@ Maj(a,b,c)
835	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
836	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
837#if __ARM_ARCH__>=7
838	@ ldr	r2,[r1],#4			@ 12
839# if 12==15
840	str	r1,[sp,#17*4]			@ make room for r1
841# endif
842	eor	r0,r4,r4,ror#5
843	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
844	eor	r0,r0,r4,ror#19	@ Sigma1(e)
845# ifndef __ARMEB__
846	rev	r2,r2
847# endif
848#else
849	@ ldrb	r2,[r1,#3]			@ 12
850	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
851	ldrb	r12,[r1,#2]
852	ldrb	r0,[r1,#1]
853	orr	r2,r2,r12,lsl#8
854	ldrb	r12,[r1],#4
855	orr	r2,r2,r0,lsl#16
856# if 12==15
857	str	r1,[sp,#17*4]			@ make room for r1
858# endif
859	eor	r0,r4,r4,ror#5
860	orr	r2,r2,r12,lsl#24
861	eor	r0,r0,r4,ror#19	@ Sigma1(e)
862#endif
863	ldr	r12,[r14],#4			@ *K256++
864	add	r7,r7,r2			@ h+=X[i]
865	str	r2,[sp,#12*4]
866	eor	r2,r5,r6
867	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
868	and	r2,r2,r4
869	add	r7,r7,r12			@ h+=K256[i]
870	eor	r2,r2,r6			@ Ch(e,f,g)
871	eor	r0,r8,r8,ror#11
872	add	r7,r7,r2			@ h+=Ch(e,f,g)
873#if 12==31
874	and	r12,r12,#0xff
875	cmp	r12,#0xf2			@ done?
876#endif
877#if 12<15
878# if __ARM_ARCH__>=7
879	ldr	r2,[r1],#4			@ prefetch
880# else
881	ldrb	r2,[r1,#3]
882# endif
883	eor	r12,r8,r9			@ a^b, b^c in next round
884#else
885	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
886	eor	r12,r8,r9			@ a^b, b^c in next round
887	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
888#endif
889	eor	r0,r0,r8,ror#20	@ Sigma0(a)
890	and	r3,r3,r12			@ (b^c)&=(a^b)
891	add	r11,r11,r7			@ d+=h
892	eor	r3,r3,r9			@ Maj(a,b,c)
893	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
894	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
895#if __ARM_ARCH__>=7
896	@ ldr	r2,[r1],#4			@ 13
897# if 13==15
898	str	r1,[sp,#17*4]			@ make room for r1
899# endif
900	eor	r0,r11,r11,ror#5
901	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
902	eor	r0,r0,r11,ror#19	@ Sigma1(e)
903# ifndef __ARMEB__
904	rev	r2,r2
905# endif
906#else
907	@ ldrb	r2,[r1,#3]			@ 13
908	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
909	ldrb	r3,[r1,#2]
910	ldrb	r0,[r1,#1]
911	orr	r2,r2,r3,lsl#8
912	ldrb	r3,[r1],#4
913	orr	r2,r2,r0,lsl#16
914# if 13==15
915	str	r1,[sp,#17*4]			@ make room for r1
916# endif
917	eor	r0,r11,r11,ror#5
918	orr	r2,r2,r3,lsl#24
919	eor	r0,r0,r11,ror#19	@ Sigma1(e)
920#endif
921	ldr	r3,[r14],#4			@ *K256++
922	add	r6,r6,r2			@ h+=X[i]
923	str	r2,[sp,#13*4]
924	eor	r2,r4,r5
925	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
926	and	r2,r2,r11
927	add	r6,r6,r3			@ h+=K256[i]
928	eor	r2,r2,r5			@ Ch(e,f,g)
929	eor	r0,r7,r7,ror#11
930	add	r6,r6,r2			@ h+=Ch(e,f,g)
931#if 13==31
932	and	r3,r3,#0xff
933	cmp	r3,#0xf2			@ done?
934#endif
935#if 13<15
936# if __ARM_ARCH__>=7
937	ldr	r2,[r1],#4			@ prefetch
938# else
939	ldrb	r2,[r1,#3]
940# endif
941	eor	r3,r7,r8			@ a^b, b^c in next round
942#else
943	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
944	eor	r3,r7,r8			@ a^b, b^c in next round
945	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
946#endif
947	eor	r0,r0,r7,ror#20	@ Sigma0(a)
948	and	r12,r12,r3			@ (b^c)&=(a^b)
949	add	r10,r10,r6			@ d+=h
950	eor	r12,r12,r8			@ Maj(a,b,c)
951	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
952	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
953#if __ARM_ARCH__>=7
954	@ ldr	r2,[r1],#4			@ 14
955# if 14==15
956	str	r1,[sp,#17*4]			@ make room for r1
957# endif
958	eor	r0,r10,r10,ror#5
959	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
960	eor	r0,r0,r10,ror#19	@ Sigma1(e)
961# ifndef __ARMEB__
962	rev	r2,r2
963# endif
964#else
965	@ ldrb	r2,[r1,#3]			@ 14
966	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
967	ldrb	r12,[r1,#2]
968	ldrb	r0,[r1,#1]
969	orr	r2,r2,r12,lsl#8
970	ldrb	r12,[r1],#4
971	orr	r2,r2,r0,lsl#16
972# if 14==15
973	str	r1,[sp,#17*4]			@ make room for r1
974# endif
975	eor	r0,r10,r10,ror#5
976	orr	r2,r2,r12,lsl#24
977	eor	r0,r0,r10,ror#19	@ Sigma1(e)
978#endif
979	ldr	r12,[r14],#4			@ *K256++
980	add	r5,r5,r2			@ h+=X[i]
981	str	r2,[sp,#14*4]
982	eor	r2,r11,r4
983	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
984	and	r2,r2,r10
985	add	r5,r5,r12			@ h+=K256[i]
986	eor	r2,r2,r4			@ Ch(e,f,g)
987	eor	r0,r6,r6,ror#11
988	add	r5,r5,r2			@ h+=Ch(e,f,g)
989#if 14==31
990	and	r12,r12,#0xff
991	cmp	r12,#0xf2			@ done?
992#endif
993#if 14<15
994# if __ARM_ARCH__>=7
995	ldr	r2,[r1],#4			@ prefetch
996# else
997	ldrb	r2,[r1,#3]
998# endif
999	eor	r12,r6,r7			@ a^b, b^c in next round
1000#else
1001	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1002	eor	r12,r6,r7			@ a^b, b^c in next round
1003	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1004#endif
1005	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1006	and	r3,r3,r12			@ (b^c)&=(a^b)
1007	add	r9,r9,r5			@ d+=h
1008	eor	r3,r3,r7			@ Maj(a,b,c)
1009	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1010	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1011#if __ARM_ARCH__>=7
1012	@ ldr	r2,[r1],#4			@ 15
1013# if 15==15
1014	str	r1,[sp,#17*4]			@ make room for r1
1015# endif
1016	eor	r0,r9,r9,ror#5
1017	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1018	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1019# ifndef __ARMEB__
1020	rev	r2,r2
1021# endif
1022#else
1023	@ ldrb	r2,[r1,#3]			@ 15
1024	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1025	ldrb	r3,[r1,#2]
1026	ldrb	r0,[r1,#1]
1027	orr	r2,r2,r3,lsl#8
1028	ldrb	r3,[r1],#4
1029	orr	r2,r2,r0,lsl#16
1030# if 15==15
1031	str	r1,[sp,#17*4]			@ make room for r1
1032# endif
1033	eor	r0,r9,r9,ror#5
1034	orr	r2,r2,r3,lsl#24
1035	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1036#endif
1037	ldr	r3,[r14],#4			@ *K256++
1038	add	r4,r4,r2			@ h+=X[i]
1039	str	r2,[sp,#15*4]
1040	eor	r2,r10,r11
1041	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1042	and	r2,r2,r9
1043	add	r4,r4,r3			@ h+=K256[i]
1044	eor	r2,r2,r11			@ Ch(e,f,g)
1045	eor	r0,r5,r5,ror#11
1046	add	r4,r4,r2			@ h+=Ch(e,f,g)
1047#if 15==31
1048	and	r3,r3,#0xff
1049	cmp	r3,#0xf2			@ done?
1050#endif
1051#if 15<15
1052# if __ARM_ARCH__>=7
1053	ldr	r2,[r1],#4			@ prefetch
1054# else
1055	ldrb	r2,[r1,#3]
1056# endif
1057	eor	r3,r5,r6			@ a^b, b^c in next round
1058#else
1059	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1060	eor	r3,r5,r6			@ a^b, b^c in next round
1061	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1062#endif
1063	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1064	and	r12,r12,r3			@ (b^c)&=(a^b)
1065	add	r8,r8,r4			@ d+=h
1066	eor	r12,r12,r6			@ Maj(a,b,c)
1067	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1068	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1069.Lrounds_16_xx:
1070	@ ldr	r2,[sp,#1*4]		@ 16
1071	@ ldr	r1,[sp,#14*4]
1072	mov	r0,r2,ror#7
1073	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1074	mov	r12,r1,ror#17
1075	eor	r0,r0,r2,ror#18
1076	eor	r12,r12,r1,ror#19
1077	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1078	ldr	r2,[sp,#0*4]
1079	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1080	ldr	r1,[sp,#9*4]
1081
1082	add	r12,r12,r0
1083	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1084	add	r2,r2,r12
1085	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1086	add	r2,r2,r1			@ X[i]
1087	ldr	r12,[r14],#4			@ *K256++
1088	add	r11,r11,r2			@ h+=X[i]
1089	str	r2,[sp,#0*4]
1090	eor	r2,r9,r10
1091	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1092	and	r2,r2,r8
1093	add	r11,r11,r12			@ h+=K256[i]
1094	eor	r2,r2,r10			@ Ch(e,f,g)
1095	eor	r0,r4,r4,ror#11
1096	add	r11,r11,r2			@ h+=Ch(e,f,g)
1097#if 16==31
1098	and	r12,r12,#0xff
1099	cmp	r12,#0xf2			@ done?
1100#endif
1101#if 16<15
1102# if __ARM_ARCH__>=7
1103	ldr	r2,[r1],#4			@ prefetch
1104# else
1105	ldrb	r2,[r1,#3]
1106# endif
1107	eor	r12,r4,r5			@ a^b, b^c in next round
1108#else
1109	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1110	eor	r12,r4,r5			@ a^b, b^c in next round
1111	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1112#endif
1113	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1114	and	r3,r3,r12			@ (b^c)&=(a^b)
1115	add	r7,r7,r11			@ d+=h
1116	eor	r3,r3,r5			@ Maj(a,b,c)
1117	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1118	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1119	@ ldr	r2,[sp,#2*4]		@ 17
1120	@ ldr	r1,[sp,#15*4]
1121	mov	r0,r2,ror#7
1122	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1123	mov	r3,r1,ror#17
1124	eor	r0,r0,r2,ror#18
1125	eor	r3,r3,r1,ror#19
1126	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1127	ldr	r2,[sp,#1*4]
1128	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1129	ldr	r1,[sp,#10*4]
1130
1131	add	r3,r3,r0
1132	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1133	add	r2,r2,r3
1134	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1135	add	r2,r2,r1			@ X[i]
1136	ldr	r3,[r14],#4			@ *K256++
1137	add	r10,r10,r2			@ h+=X[i]
1138	str	r2,[sp,#1*4]
1139	eor	r2,r8,r9
1140	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1141	and	r2,r2,r7
1142	add	r10,r10,r3			@ h+=K256[i]
1143	eor	r2,r2,r9			@ Ch(e,f,g)
1144	eor	r0,r11,r11,ror#11
1145	add	r10,r10,r2			@ h+=Ch(e,f,g)
1146#if 17==31
1147	and	r3,r3,#0xff
1148	cmp	r3,#0xf2			@ done?
1149#endif
1150#if 17<15
1151# if __ARM_ARCH__>=7
1152	ldr	r2,[r1],#4			@ prefetch
1153# else
1154	ldrb	r2,[r1,#3]
1155# endif
1156	eor	r3,r11,r4			@ a^b, b^c in next round
1157#else
1158	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1159	eor	r3,r11,r4			@ a^b, b^c in next round
1160	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1161#endif
1162	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1163	and	r12,r12,r3			@ (b^c)&=(a^b)
1164	add	r6,r6,r10			@ d+=h
1165	eor	r12,r12,r4			@ Maj(a,b,c)
1166	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1167	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1168	@ ldr	r2,[sp,#3*4]		@ 18
1169	@ ldr	r1,[sp,#0*4]
1170	mov	r0,r2,ror#7
1171	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1172	mov	r12,r1,ror#17
1173	eor	r0,r0,r2,ror#18
1174	eor	r12,r12,r1,ror#19
1175	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1176	ldr	r2,[sp,#2*4]
1177	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1178	ldr	r1,[sp,#11*4]
1179
1180	add	r12,r12,r0
1181	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1182	add	r2,r2,r12
1183	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1184	add	r2,r2,r1			@ X[i]
1185	ldr	r12,[r14],#4			@ *K256++
1186	add	r9,r9,r2			@ h+=X[i]
1187	str	r2,[sp,#2*4]
1188	eor	r2,r7,r8
1189	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1190	and	r2,r2,r6
1191	add	r9,r9,r12			@ h+=K256[i]
1192	eor	r2,r2,r8			@ Ch(e,f,g)
1193	eor	r0,r10,r10,ror#11
1194	add	r9,r9,r2			@ h+=Ch(e,f,g)
1195#if 18==31
1196	and	r12,r12,#0xff
1197	cmp	r12,#0xf2			@ done?
1198#endif
1199#if 18<15
1200# if __ARM_ARCH__>=7
1201	ldr	r2,[r1],#4			@ prefetch
1202# else
1203	ldrb	r2,[r1,#3]
1204# endif
1205	eor	r12,r10,r11			@ a^b, b^c in next round
1206#else
1207	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1208	eor	r12,r10,r11			@ a^b, b^c in next round
1209	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1210#endif
1211	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1212	and	r3,r3,r12			@ (b^c)&=(a^b)
1213	add	r5,r5,r9			@ d+=h
1214	eor	r3,r3,r11			@ Maj(a,b,c)
1215	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1216	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1217	@ ldr	r2,[sp,#4*4]		@ 19
1218	@ ldr	r1,[sp,#1*4]
1219	mov	r0,r2,ror#7
1220	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1221	mov	r3,r1,ror#17
1222	eor	r0,r0,r2,ror#18
1223	eor	r3,r3,r1,ror#19
1224	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1225	ldr	r2,[sp,#3*4]
1226	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1227	ldr	r1,[sp,#12*4]
1228
1229	add	r3,r3,r0
1230	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1231	add	r2,r2,r3
1232	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1233	add	r2,r2,r1			@ X[i]
1234	ldr	r3,[r14],#4			@ *K256++
1235	add	r8,r8,r2			@ h+=X[i]
1236	str	r2,[sp,#3*4]
1237	eor	r2,r6,r7
1238	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1239	and	r2,r2,r5
1240	add	r8,r8,r3			@ h+=K256[i]
1241	eor	r2,r2,r7			@ Ch(e,f,g)
1242	eor	r0,r9,r9,ror#11
1243	add	r8,r8,r2			@ h+=Ch(e,f,g)
1244#if 19==31
1245	and	r3,r3,#0xff
1246	cmp	r3,#0xf2			@ done?
1247#endif
1248#if 19<15
1249# if __ARM_ARCH__>=7
1250	ldr	r2,[r1],#4			@ prefetch
1251# else
1252	ldrb	r2,[r1,#3]
1253# endif
1254	eor	r3,r9,r10			@ a^b, b^c in next round
1255#else
1256	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1257	eor	r3,r9,r10			@ a^b, b^c in next round
1258	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1259#endif
1260	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1261	and	r12,r12,r3			@ (b^c)&=(a^b)
1262	add	r4,r4,r8			@ d+=h
1263	eor	r12,r12,r10			@ Maj(a,b,c)
1264	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1265	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1266	@ ldr	r2,[sp,#5*4]		@ 20
1267	@ ldr	r1,[sp,#2*4]
1268	mov	r0,r2,ror#7
1269	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1270	mov	r12,r1,ror#17
1271	eor	r0,r0,r2,ror#18
1272	eor	r12,r12,r1,ror#19
1273	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1274	ldr	r2,[sp,#4*4]
1275	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1276	ldr	r1,[sp,#13*4]
1277
1278	add	r12,r12,r0
1279	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1280	add	r2,r2,r12
1281	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1282	add	r2,r2,r1			@ X[i]
1283	ldr	r12,[r14],#4			@ *K256++
1284	add	r7,r7,r2			@ h+=X[i]
1285	str	r2,[sp,#4*4]
1286	eor	r2,r5,r6
1287	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1288	and	r2,r2,r4
1289	add	r7,r7,r12			@ h+=K256[i]
1290	eor	r2,r2,r6			@ Ch(e,f,g)
1291	eor	r0,r8,r8,ror#11
1292	add	r7,r7,r2			@ h+=Ch(e,f,g)
1293#if 20==31
1294	and	r12,r12,#0xff
1295	cmp	r12,#0xf2			@ done?
1296#endif
1297#if 20<15
1298# if __ARM_ARCH__>=7
1299	ldr	r2,[r1],#4			@ prefetch
1300# else
1301	ldrb	r2,[r1,#3]
1302# endif
1303	eor	r12,r8,r9			@ a^b, b^c in next round
1304#else
1305	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1306	eor	r12,r8,r9			@ a^b, b^c in next round
1307	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1308#endif
1309	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1310	and	r3,r3,r12			@ (b^c)&=(a^b)
1311	add	r11,r11,r7			@ d+=h
1312	eor	r3,r3,r9			@ Maj(a,b,c)
1313	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1314	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1315	@ ldr	r2,[sp,#6*4]		@ 21
1316	@ ldr	r1,[sp,#3*4]
1317	mov	r0,r2,ror#7
1318	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1319	mov	r3,r1,ror#17
1320	eor	r0,r0,r2,ror#18
1321	eor	r3,r3,r1,ror#19
1322	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1323	ldr	r2,[sp,#5*4]
1324	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1325	ldr	r1,[sp,#14*4]
1326
1327	add	r3,r3,r0
1328	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1329	add	r2,r2,r3
1330	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1331	add	r2,r2,r1			@ X[i]
1332	ldr	r3,[r14],#4			@ *K256++
1333	add	r6,r6,r2			@ h+=X[i]
1334	str	r2,[sp,#5*4]
1335	eor	r2,r4,r5
1336	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1337	and	r2,r2,r11
1338	add	r6,r6,r3			@ h+=K256[i]
1339	eor	r2,r2,r5			@ Ch(e,f,g)
1340	eor	r0,r7,r7,ror#11
1341	add	r6,r6,r2			@ h+=Ch(e,f,g)
1342#if 21==31
1343	and	r3,r3,#0xff
1344	cmp	r3,#0xf2			@ done?
1345#endif
1346#if 21<15
1347# if __ARM_ARCH__>=7
1348	ldr	r2,[r1],#4			@ prefetch
1349# else
1350	ldrb	r2,[r1,#3]
1351# endif
1352	eor	r3,r7,r8			@ a^b, b^c in next round
1353#else
1354	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1355	eor	r3,r7,r8			@ a^b, b^c in next round
1356	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1357#endif
1358	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1359	and	r12,r12,r3			@ (b^c)&=(a^b)
1360	add	r10,r10,r6			@ d+=h
1361	eor	r12,r12,r8			@ Maj(a,b,c)
1362	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1363	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1364	@ ldr	r2,[sp,#7*4]		@ 22
1365	@ ldr	r1,[sp,#4*4]
1366	mov	r0,r2,ror#7
1367	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1368	mov	r12,r1,ror#17
1369	eor	r0,r0,r2,ror#18
1370	eor	r12,r12,r1,ror#19
1371	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1372	ldr	r2,[sp,#6*4]
1373	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1374	ldr	r1,[sp,#15*4]
1375
1376	add	r12,r12,r0
1377	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1378	add	r2,r2,r12
1379	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1380	add	r2,r2,r1			@ X[i]
1381	ldr	r12,[r14],#4			@ *K256++
1382	add	r5,r5,r2			@ h+=X[i]
1383	str	r2,[sp,#6*4]
1384	eor	r2,r11,r4
1385	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1386	and	r2,r2,r10
1387	add	r5,r5,r12			@ h+=K256[i]
1388	eor	r2,r2,r4			@ Ch(e,f,g)
1389	eor	r0,r6,r6,ror#11
1390	add	r5,r5,r2			@ h+=Ch(e,f,g)
1391#if 22==31
1392	and	r12,r12,#0xff
1393	cmp	r12,#0xf2			@ done?
1394#endif
1395#if 22<15
1396# if __ARM_ARCH__>=7
1397	ldr	r2,[r1],#4			@ prefetch
1398# else
1399	ldrb	r2,[r1,#3]
1400# endif
1401	eor	r12,r6,r7			@ a^b, b^c in next round
1402#else
1403	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1404	eor	r12,r6,r7			@ a^b, b^c in next round
1405	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1406#endif
1407	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1408	and	r3,r3,r12			@ (b^c)&=(a^b)
1409	add	r9,r9,r5			@ d+=h
1410	eor	r3,r3,r7			@ Maj(a,b,c)
1411	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1412	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1413	@ ldr	r2,[sp,#8*4]		@ 23
1414	@ ldr	r1,[sp,#5*4]
1415	mov	r0,r2,ror#7
1416	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1417	mov	r3,r1,ror#17
1418	eor	r0,r0,r2,ror#18
1419	eor	r3,r3,r1,ror#19
1420	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1421	ldr	r2,[sp,#7*4]
1422	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1423	ldr	r1,[sp,#0*4]
1424
1425	add	r3,r3,r0
1426	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1427	add	r2,r2,r3
1428	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1429	add	r2,r2,r1			@ X[i]
1430	ldr	r3,[r14],#4			@ *K256++
1431	add	r4,r4,r2			@ h+=X[i]
1432	str	r2,[sp,#7*4]
1433	eor	r2,r10,r11
1434	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1435	and	r2,r2,r9
1436	add	r4,r4,r3			@ h+=K256[i]
1437	eor	r2,r2,r11			@ Ch(e,f,g)
1438	eor	r0,r5,r5,ror#11
1439	add	r4,r4,r2			@ h+=Ch(e,f,g)
1440#if 23==31
1441	and	r3,r3,#0xff
1442	cmp	r3,#0xf2			@ done?
1443#endif
1444#if 23<15
1445# if __ARM_ARCH__>=7
1446	ldr	r2,[r1],#4			@ prefetch
1447# else
1448	ldrb	r2,[r1,#3]
1449# endif
1450	eor	r3,r5,r6			@ a^b, b^c in next round
1451#else
1452	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1453	eor	r3,r5,r6			@ a^b, b^c in next round
1454	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1455#endif
1456	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1457	and	r12,r12,r3			@ (b^c)&=(a^b)
1458	add	r8,r8,r4			@ d+=h
1459	eor	r12,r12,r6			@ Maj(a,b,c)
1460	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1461	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1462	@ ldr	r2,[sp,#9*4]		@ 24
1463	@ ldr	r1,[sp,#6*4]
1464	mov	r0,r2,ror#7
1465	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1466	mov	r12,r1,ror#17
1467	eor	r0,r0,r2,ror#18
1468	eor	r12,r12,r1,ror#19
1469	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1470	ldr	r2,[sp,#8*4]
1471	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1472	ldr	r1,[sp,#1*4]
1473
1474	add	r12,r12,r0
1475	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1476	add	r2,r2,r12
1477	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1478	add	r2,r2,r1			@ X[i]
1479	ldr	r12,[r14],#4			@ *K256++
1480	add	r11,r11,r2			@ h+=X[i]
1481	str	r2,[sp,#8*4]
1482	eor	r2,r9,r10
1483	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1484	and	r2,r2,r8
1485	add	r11,r11,r12			@ h+=K256[i]
1486	eor	r2,r2,r10			@ Ch(e,f,g)
1487	eor	r0,r4,r4,ror#11
1488	add	r11,r11,r2			@ h+=Ch(e,f,g)
1489#if 24==31
1490	and	r12,r12,#0xff
1491	cmp	r12,#0xf2			@ done?
1492#endif
1493#if 24<15
1494# if __ARM_ARCH__>=7
1495	ldr	r2,[r1],#4			@ prefetch
1496# else
1497	ldrb	r2,[r1,#3]
1498# endif
1499	eor	r12,r4,r5			@ a^b, b^c in next round
1500#else
1501	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1502	eor	r12,r4,r5			@ a^b, b^c in next round
1503	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1504#endif
1505	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1506	and	r3,r3,r12			@ (b^c)&=(a^b)
1507	add	r7,r7,r11			@ d+=h
1508	eor	r3,r3,r5			@ Maj(a,b,c)
1509	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1510	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1511	@ ldr	r2,[sp,#10*4]		@ 25
1512	@ ldr	r1,[sp,#7*4]
1513	mov	r0,r2,ror#7
1514	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1515	mov	r3,r1,ror#17
1516	eor	r0,r0,r2,ror#18
1517	eor	r3,r3,r1,ror#19
1518	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1519	ldr	r2,[sp,#9*4]
1520	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1521	ldr	r1,[sp,#2*4]
1522
1523	add	r3,r3,r0
1524	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1525	add	r2,r2,r3
1526	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1527	add	r2,r2,r1			@ X[i]
1528	ldr	r3,[r14],#4			@ *K256++
1529	add	r10,r10,r2			@ h+=X[i]
1530	str	r2,[sp,#9*4]
1531	eor	r2,r8,r9
1532	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1533	and	r2,r2,r7
1534	add	r10,r10,r3			@ h+=K256[i]
1535	eor	r2,r2,r9			@ Ch(e,f,g)
1536	eor	r0,r11,r11,ror#11
1537	add	r10,r10,r2			@ h+=Ch(e,f,g)
1538#if 25==31
1539	and	r3,r3,#0xff
1540	cmp	r3,#0xf2			@ done?
1541#endif
1542#if 25<15
1543# if __ARM_ARCH__>=7
1544	ldr	r2,[r1],#4			@ prefetch
1545# else
1546	ldrb	r2,[r1,#3]
1547# endif
1548	eor	r3,r11,r4			@ a^b, b^c in next round
1549#else
1550	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1551	eor	r3,r11,r4			@ a^b, b^c in next round
1552	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1553#endif
1554	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1555	and	r12,r12,r3			@ (b^c)&=(a^b)
1556	add	r6,r6,r10			@ d+=h
1557	eor	r12,r12,r4			@ Maj(a,b,c)
1558	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1559	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1560	@ ldr	r2,[sp,#11*4]		@ 26
1561	@ ldr	r1,[sp,#8*4]
1562	mov	r0,r2,ror#7
1563	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1564	mov	r12,r1,ror#17
1565	eor	r0,r0,r2,ror#18
1566	eor	r12,r12,r1,ror#19
1567	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1568	ldr	r2,[sp,#10*4]
1569	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1570	ldr	r1,[sp,#3*4]
1571
1572	add	r12,r12,r0
1573	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1574	add	r2,r2,r12
1575	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1576	add	r2,r2,r1			@ X[i]
1577	ldr	r12,[r14],#4			@ *K256++
1578	add	r9,r9,r2			@ h+=X[i]
1579	str	r2,[sp,#10*4]
1580	eor	r2,r7,r8
1581	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1582	and	r2,r2,r6
1583	add	r9,r9,r12			@ h+=K256[i]
1584	eor	r2,r2,r8			@ Ch(e,f,g)
1585	eor	r0,r10,r10,ror#11
1586	add	r9,r9,r2			@ h+=Ch(e,f,g)
1587#if 26==31
1588	and	r12,r12,#0xff
1589	cmp	r12,#0xf2			@ done?
1590#endif
1591#if 26<15
1592# if __ARM_ARCH__>=7
1593	ldr	r2,[r1],#4			@ prefetch
1594# else
1595	ldrb	r2,[r1,#3]
1596# endif
1597	eor	r12,r10,r11			@ a^b, b^c in next round
1598#else
1599	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1600	eor	r12,r10,r11			@ a^b, b^c in next round
1601	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1602#endif
1603	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1604	and	r3,r3,r12			@ (b^c)&=(a^b)
1605	add	r5,r5,r9			@ d+=h
1606	eor	r3,r3,r11			@ Maj(a,b,c)
1607	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1608	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1609	@ ldr	r2,[sp,#12*4]		@ 27
1610	@ ldr	r1,[sp,#9*4]
1611	mov	r0,r2,ror#7
1612	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1613	mov	r3,r1,ror#17
1614	eor	r0,r0,r2,ror#18
1615	eor	r3,r3,r1,ror#19
1616	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1617	ldr	r2,[sp,#11*4]
1618	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1619	ldr	r1,[sp,#4*4]
1620
1621	add	r3,r3,r0
1622	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1623	add	r2,r2,r3
1624	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1625	add	r2,r2,r1			@ X[i]
1626	ldr	r3,[r14],#4			@ *K256++
1627	add	r8,r8,r2			@ h+=X[i]
1628	str	r2,[sp,#11*4]
1629	eor	r2,r6,r7
1630	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1631	and	r2,r2,r5
1632	add	r8,r8,r3			@ h+=K256[i]
1633	eor	r2,r2,r7			@ Ch(e,f,g)
1634	eor	r0,r9,r9,ror#11
1635	add	r8,r8,r2			@ h+=Ch(e,f,g)
1636#if 27==31
1637	and	r3,r3,#0xff
1638	cmp	r3,#0xf2			@ done?
1639#endif
1640#if 27<15
1641# if __ARM_ARCH__>=7
1642	ldr	r2,[r1],#4			@ prefetch
1643# else
1644	ldrb	r2,[r1,#3]
1645# endif
1646	eor	r3,r9,r10			@ a^b, b^c in next round
1647#else
1648	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1649	eor	r3,r9,r10			@ a^b, b^c in next round
1650	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1651#endif
1652	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1653	and	r12,r12,r3			@ (b^c)&=(a^b)
1654	add	r4,r4,r8			@ d+=h
1655	eor	r12,r12,r10			@ Maj(a,b,c)
1656	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1657	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1658	@ ldr	r2,[sp,#13*4]		@ 28
1659	@ ldr	r1,[sp,#10*4]
1660	mov	r0,r2,ror#7
1661	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1662	mov	r12,r1,ror#17
1663	eor	r0,r0,r2,ror#18
1664	eor	r12,r12,r1,ror#19
1665	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1666	ldr	r2,[sp,#12*4]
1667	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1668	ldr	r1,[sp,#5*4]
1669
1670	add	r12,r12,r0
1671	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1672	add	r2,r2,r12
1673	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1674	add	r2,r2,r1			@ X[i]
1675	ldr	r12,[r14],#4			@ *K256++
1676	add	r7,r7,r2			@ h+=X[i]
1677	str	r2,[sp,#12*4]
1678	eor	r2,r5,r6
1679	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1680	and	r2,r2,r4
1681	add	r7,r7,r12			@ h+=K256[i]
1682	eor	r2,r2,r6			@ Ch(e,f,g)
1683	eor	r0,r8,r8,ror#11
1684	add	r7,r7,r2			@ h+=Ch(e,f,g)
1685#if 28==31
1686	and	r12,r12,#0xff
1687	cmp	r12,#0xf2			@ done?
1688#endif
1689#if 28<15
1690# if __ARM_ARCH__>=7
1691	ldr	r2,[r1],#4			@ prefetch
1692# else
1693	ldrb	r2,[r1,#3]
1694# endif
1695	eor	r12,r8,r9			@ a^b, b^c in next round
1696#else
1697	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1698	eor	r12,r8,r9			@ a^b, b^c in next round
1699	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1700#endif
1701	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1702	and	r3,r3,r12			@ (b^c)&=(a^b)
1703	add	r11,r11,r7			@ d+=h
1704	eor	r3,r3,r9			@ Maj(a,b,c)
1705	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1706	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1707	@ ldr	r2,[sp,#14*4]		@ 29
1708	@ ldr	r1,[sp,#11*4]
1709	mov	r0,r2,ror#7
1710	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1711	mov	r3,r1,ror#17
1712	eor	r0,r0,r2,ror#18
1713	eor	r3,r3,r1,ror#19
1714	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1715	ldr	r2,[sp,#13*4]
1716	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1717	ldr	r1,[sp,#6*4]
1718
1719	add	r3,r3,r0
1720	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1721	add	r2,r2,r3
1722	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1723	add	r2,r2,r1			@ X[i]
1724	ldr	r3,[r14],#4			@ *K256++
1725	add	r6,r6,r2			@ h+=X[i]
1726	str	r2,[sp,#13*4]
1727	eor	r2,r4,r5
1728	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1729	and	r2,r2,r11
1730	add	r6,r6,r3			@ h+=K256[i]
1731	eor	r2,r2,r5			@ Ch(e,f,g)
1732	eor	r0,r7,r7,ror#11
1733	add	r6,r6,r2			@ h+=Ch(e,f,g)
1734#if 29==31
1735	and	r3,r3,#0xff
1736	cmp	r3,#0xf2			@ done?
1737#endif
1738#if 29<15
1739# if __ARM_ARCH__>=7
1740	ldr	r2,[r1],#4			@ prefetch
1741# else
1742	ldrb	r2,[r1,#3]
1743# endif
1744	eor	r3,r7,r8			@ a^b, b^c in next round
1745#else
1746	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1747	eor	r3,r7,r8			@ a^b, b^c in next round
1748	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1749#endif
1750	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1751	and	r12,r12,r3			@ (b^c)&=(a^b)
1752	add	r10,r10,r6			@ d+=h
1753	eor	r12,r12,r8			@ Maj(a,b,c)
1754	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1755	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1756	@ ldr	r2,[sp,#15*4]		@ 30
1757	@ ldr	r1,[sp,#12*4]
1758	mov	r0,r2,ror#7
1759	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1760	mov	r12,r1,ror#17
1761	eor	r0,r0,r2,ror#18
1762	eor	r12,r12,r1,ror#19
1763	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1764	ldr	r2,[sp,#14*4]
1765	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1766	ldr	r1,[sp,#7*4]
1767
1768	add	r12,r12,r0
1769	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1770	add	r2,r2,r12
1771	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1772	add	r2,r2,r1			@ X[i]
1773	ldr	r12,[r14],#4			@ *K256++
1774	add	r5,r5,r2			@ h+=X[i]
1775	str	r2,[sp,#14*4]
1776	eor	r2,r11,r4
1777	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1778	and	r2,r2,r10
1779	add	r5,r5,r12			@ h+=K256[i]
1780	eor	r2,r2,r4			@ Ch(e,f,g)
1781	eor	r0,r6,r6,ror#11
1782	add	r5,r5,r2			@ h+=Ch(e,f,g)
1783#if 30==31
1784	and	r12,r12,#0xff
1785	cmp	r12,#0xf2			@ done?
1786#endif
1787#if 30<15
1788# if __ARM_ARCH__>=7
1789	ldr	r2,[r1],#4			@ prefetch
1790# else
1791	ldrb	r2,[r1,#3]
1792# endif
1793	eor	r12,r6,r7			@ a^b, b^c in next round
1794#else
1795	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1796	eor	r12,r6,r7			@ a^b, b^c in next round
1797	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1798#endif
1799	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1800	and	r3,r3,r12			@ (b^c)&=(a^b)
1801	add	r9,r9,r5			@ d+=h
1802	eor	r3,r3,r7			@ Maj(a,b,c)
1803	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1804	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1805	@ ldr	r2,[sp,#0*4]		@ 31
1806	@ ldr	r1,[sp,#13*4]
1807	mov	r0,r2,ror#7
1808	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1809	mov	r3,r1,ror#17
1810	eor	r0,r0,r2,ror#18
1811	eor	r3,r3,r1,ror#19
1812	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1813	ldr	r2,[sp,#15*4]
1814	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1815	ldr	r1,[sp,#8*4]
1816
1817	add	r3,r3,r0
1818	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1819	add	r2,r2,r3
1820	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1821	add	r2,r2,r1			@ X[i]
1822	ldr	r3,[r14],#4			@ *K256++
1823	add	r4,r4,r2			@ h+=X[i]
1824	str	r2,[sp,#15*4]
1825	eor	r2,r10,r11
1826	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1827	and	r2,r2,r9
1828	add	r4,r4,r3			@ h+=K256[i]
1829	eor	r2,r2,r11			@ Ch(e,f,g)
1830	eor	r0,r5,r5,ror#11
1831	add	r4,r4,r2			@ h+=Ch(e,f,g)
1832#if 31==31
1833	and	r3,r3,#0xff
1834	cmp	r3,#0xf2			@ done?
1835#endif
1836#if 31<15
1837# if __ARM_ARCH__>=7
1838	ldr	r2,[r1],#4			@ prefetch
1839# else
1840	ldrb	r2,[r1,#3]
1841# endif
1842	eor	r3,r5,r6			@ a^b, b^c in next round
1843#else
1844	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1845	eor	r3,r5,r6			@ a^b, b^c in next round
1846	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1847#endif
1848	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1849	and	r12,r12,r3			@ (b^c)&=(a^b)
1850	add	r8,r8,r4			@ d+=h
1851	eor	r12,r12,r6			@ Maj(a,b,c)
1852	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1853	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1854#if __ARM_ARCH__>=7
1855	ite	eq			@ Thumb2 thing, sanity check in ARM
1856#endif
1857	ldreq	r3,[sp,#16*4]		@ pull ctx
1858	bne	.Lrounds_16_xx
1859
1860	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1861	ldr	r0,[r3,#0]
1862	ldr	r2,[r3,#4]
1863	ldr	r12,[r3,#8]
1864	add	r4,r4,r0
1865	ldr	r0,[r3,#12]
1866	add	r5,r5,r2
1867	ldr	r2,[r3,#16]
1868	add	r6,r6,r12
1869	ldr	r12,[r3,#20]
1870	add	r7,r7,r0
1871	ldr	r0,[r3,#24]
1872	add	r8,r8,r2
1873	ldr	r2,[r3,#28]
1874	add	r9,r9,r12
1875	ldr	r1,[sp,#17*4]		@ pull inp
1876	ldr	r12,[sp,#18*4]		@ pull inp+len
1877	add	r10,r10,r0
1878	add	r11,r11,r2
1879	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1880	cmp	r1,r12
1881	sub	r14,r14,#256	@ rewind Ktbl
1882	bne	.Loop
1883
1884	add	sp,sp,#19*4	@ destroy frame
1885#if __ARM_ARCH__>=5
1886	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1887#else
1888	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1889	tst	lr,#1
1890	moveq	pc,lr			@ be binary compatible with V4, yet
1891.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1892#endif
1893.size	GFp_sha256_block_data_order,.-GFp_sha256_block_data_order
1894#if __ARM_MAX_ARCH__>=7
1895.arch	armv7-a
1896.fpu	neon
1897
1898.type	sha256_block_data_order_neon,%function
1899.align	5
1900.skip	16
1901sha256_block_data_order_neon:
1902.LNEON:
1903	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1904
1905	sub	r11,sp,#16*4+16
1906	adr	r14,K256
1907	bic	r11,r11,#15		@ align for 128-bit stores
1908	mov	r12,sp
1909	mov	sp,r11			@ alloca
1910	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1911
1912	vld1.8	{q0},[r1]!
1913	vld1.8	{q1},[r1]!
1914	vld1.8	{q2},[r1]!
1915	vld1.8	{q3},[r1]!
1916	vld1.32	{q8},[r14,:128]!
1917	vld1.32	{q9},[r14,:128]!
1918	vld1.32	{q10},[r14,:128]!
1919	vld1.32	{q11},[r14,:128]!
1920	vrev32.8	q0,q0		@ yes, even on
1921	str	r0,[sp,#64]
1922	vrev32.8	q1,q1		@ big-endian
1923	str	r1,[sp,#68]
1924	mov	r1,sp
1925	vrev32.8	q2,q2
1926	str	r2,[sp,#72]
1927	vrev32.8	q3,q3
1928	str	r12,[sp,#76]		@ save original sp
1929	vadd.i32	q8,q8,q0
1930	vadd.i32	q9,q9,q1
1931	vst1.32	{q8},[r1,:128]!
1932	vadd.i32	q10,q10,q2
1933	vst1.32	{q9},[r1,:128]!
1934	vadd.i32	q11,q11,q3
1935	vst1.32	{q10},[r1,:128]!
1936	vst1.32	{q11},[r1,:128]!
1937
1938	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1939	sub	r1,r1,#64
1940	ldr	r2,[sp,#0]
1941	eor	r12,r12,r12
1942	eor	r3,r5,r6
1943	b	.L_00_48
1944
1945.align	4
1946.L_00_48:
1947	vext.8	q8,q0,q1,#4
1948	add	r11,r11,r2
1949	eor	r2,r9,r10
1950	eor	r0,r8,r8,ror#5
1951	vext.8	q9,q2,q3,#4
1952	add	r4,r4,r12
1953	and	r2,r2,r8
1954	eor	r12,r0,r8,ror#19
1955	vshr.u32	q10,q8,#7
1956	eor	r0,r4,r4,ror#11
1957	eor	r2,r2,r10
1958	vadd.i32	q0,q0,q9
1959	add	r11,r11,r12,ror#6
1960	eor	r12,r4,r5
1961	vshr.u32	q9,q8,#3
1962	eor	r0,r0,r4,ror#20
1963	add	r11,r11,r2
1964	vsli.32	q10,q8,#25
1965	ldr	r2,[sp,#4]
1966	and	r3,r3,r12
1967	vshr.u32	q11,q8,#18
1968	add	r7,r7,r11
1969	add	r11,r11,r0,ror#2
1970	eor	r3,r3,r5
1971	veor	q9,q9,q10
1972	add	r10,r10,r2
1973	vsli.32	q11,q8,#14
1974	eor	r2,r8,r9
1975	eor	r0,r7,r7,ror#5
1976	vshr.u32	d24,d7,#17
1977	add	r11,r11,r3
1978	and	r2,r2,r7
1979	veor	q9,q9,q11
1980	eor	r3,r0,r7,ror#19
1981	eor	r0,r11,r11,ror#11
1982	vsli.32	d24,d7,#15
1983	eor	r2,r2,r9
1984	add	r10,r10,r3,ror#6
1985	vshr.u32	d25,d7,#10
1986	eor	r3,r11,r4
1987	eor	r0,r0,r11,ror#20
1988	vadd.i32	q0,q0,q9
1989	add	r10,r10,r2
1990	ldr	r2,[sp,#8]
1991	veor	d25,d25,d24
1992	and	r12,r12,r3
1993	add	r6,r6,r10
1994	vshr.u32	d24,d7,#19
1995	add	r10,r10,r0,ror#2
1996	eor	r12,r12,r4
1997	vsli.32	d24,d7,#13
1998	add	r9,r9,r2
1999	eor	r2,r7,r8
2000	veor	d25,d25,d24
2001	eor	r0,r6,r6,ror#5
2002	add	r10,r10,r12
2003	vadd.i32	d0,d0,d25
2004	and	r2,r2,r6
2005	eor	r12,r0,r6,ror#19
2006	vshr.u32	d24,d0,#17
2007	eor	r0,r10,r10,ror#11
2008	eor	r2,r2,r8
2009	vsli.32	d24,d0,#15
2010	add	r9,r9,r12,ror#6
2011	eor	r12,r10,r11
2012	vshr.u32	d25,d0,#10
2013	eor	r0,r0,r10,ror#20
2014	add	r9,r9,r2
2015	veor	d25,d25,d24
2016	ldr	r2,[sp,#12]
2017	and	r3,r3,r12
2018	vshr.u32	d24,d0,#19
2019	add	r5,r5,r9
2020	add	r9,r9,r0,ror#2
2021	eor	r3,r3,r11
2022	vld1.32	{q8},[r14,:128]!
2023	add	r8,r8,r2
2024	vsli.32	d24,d0,#13
2025	eor	r2,r6,r7
2026	eor	r0,r5,r5,ror#5
2027	veor	d25,d25,d24
2028	add	r9,r9,r3
2029	and	r2,r2,r5
2030	vadd.i32	d1,d1,d25
2031	eor	r3,r0,r5,ror#19
2032	eor	r0,r9,r9,ror#11
2033	vadd.i32	q8,q8,q0
2034	eor	r2,r2,r7
2035	add	r8,r8,r3,ror#6
2036	eor	r3,r9,r10
2037	eor	r0,r0,r9,ror#20
2038	add	r8,r8,r2
2039	ldr	r2,[sp,#16]
2040	and	r12,r12,r3
2041	add	r4,r4,r8
2042	vst1.32	{q8},[r1,:128]!
2043	add	r8,r8,r0,ror#2
2044	eor	r12,r12,r10
2045	vext.8	q8,q1,q2,#4
2046	add	r7,r7,r2
2047	eor	r2,r5,r6
2048	eor	r0,r4,r4,ror#5
2049	vext.8	q9,q3,q0,#4
2050	add	r8,r8,r12
2051	and	r2,r2,r4
2052	eor	r12,r0,r4,ror#19
2053	vshr.u32	q10,q8,#7
2054	eor	r0,r8,r8,ror#11
2055	eor	r2,r2,r6
2056	vadd.i32	q1,q1,q9
2057	add	r7,r7,r12,ror#6
2058	eor	r12,r8,r9
2059	vshr.u32	q9,q8,#3
2060	eor	r0,r0,r8,ror#20
2061	add	r7,r7,r2
2062	vsli.32	q10,q8,#25
2063	ldr	r2,[sp,#20]
2064	and	r3,r3,r12
2065	vshr.u32	q11,q8,#18
2066	add	r11,r11,r7
2067	add	r7,r7,r0,ror#2
2068	eor	r3,r3,r9
2069	veor	q9,q9,q10
2070	add	r6,r6,r2
2071	vsli.32	q11,q8,#14
2072	eor	r2,r4,r5
2073	eor	r0,r11,r11,ror#5
2074	vshr.u32	d24,d1,#17
2075	add	r7,r7,r3
2076	and	r2,r2,r11
2077	veor	q9,q9,q11
2078	eor	r3,r0,r11,ror#19
2079	eor	r0,r7,r7,ror#11
2080	vsli.32	d24,d1,#15
2081	eor	r2,r2,r5
2082	add	r6,r6,r3,ror#6
2083	vshr.u32	d25,d1,#10
2084	eor	r3,r7,r8
2085	eor	r0,r0,r7,ror#20
2086	vadd.i32	q1,q1,q9
2087	add	r6,r6,r2
2088	ldr	r2,[sp,#24]
2089	veor	d25,d25,d24
2090	and	r12,r12,r3
2091	add	r10,r10,r6
2092	vshr.u32	d24,d1,#19
2093	add	r6,r6,r0,ror#2
2094	eor	r12,r12,r8
2095	vsli.32	d24,d1,#13
2096	add	r5,r5,r2
2097	eor	r2,r11,r4
2098	veor	d25,d25,d24
2099	eor	r0,r10,r10,ror#5
2100	add	r6,r6,r12
2101	vadd.i32	d2,d2,d25
2102	and	r2,r2,r10
2103	eor	r12,r0,r10,ror#19
2104	vshr.u32	d24,d2,#17
2105	eor	r0,r6,r6,ror#11
2106	eor	r2,r2,r4
2107	vsli.32	d24,d2,#15
2108	add	r5,r5,r12,ror#6
2109	eor	r12,r6,r7
2110	vshr.u32	d25,d2,#10
2111	eor	r0,r0,r6,ror#20
2112	add	r5,r5,r2
2113	veor	d25,d25,d24
2114	ldr	r2,[sp,#28]
2115	and	r3,r3,r12
2116	vshr.u32	d24,d2,#19
2117	add	r9,r9,r5
2118	add	r5,r5,r0,ror#2
2119	eor	r3,r3,r7
2120	vld1.32	{q8},[r14,:128]!
2121	add	r4,r4,r2
2122	vsli.32	d24,d2,#13
2123	eor	r2,r10,r11
2124	eor	r0,r9,r9,ror#5
2125	veor	d25,d25,d24
2126	add	r5,r5,r3
2127	and	r2,r2,r9
2128	vadd.i32	d3,d3,d25
2129	eor	r3,r0,r9,ror#19
2130	eor	r0,r5,r5,ror#11
2131	vadd.i32	q8,q8,q1
2132	eor	r2,r2,r11
2133	add	r4,r4,r3,ror#6
2134	eor	r3,r5,r6
2135	eor	r0,r0,r5,ror#20
2136	add	r4,r4,r2
2137	ldr	r2,[sp,#32]
2138	and	r12,r12,r3
2139	add	r8,r8,r4
2140	vst1.32	{q8},[r1,:128]!
2141	add	r4,r4,r0,ror#2
2142	eor	r12,r12,r6
2143	vext.8	q8,q2,q3,#4
2144	add	r11,r11,r2
2145	eor	r2,r9,r10
2146	eor	r0,r8,r8,ror#5
2147	vext.8	q9,q0,q1,#4
2148	add	r4,r4,r12
2149	and	r2,r2,r8
2150	eor	r12,r0,r8,ror#19
2151	vshr.u32	q10,q8,#7
2152	eor	r0,r4,r4,ror#11
2153	eor	r2,r2,r10
2154	vadd.i32	q2,q2,q9
2155	add	r11,r11,r12,ror#6
2156	eor	r12,r4,r5
2157	vshr.u32	q9,q8,#3
2158	eor	r0,r0,r4,ror#20
2159	add	r11,r11,r2
2160	vsli.32	q10,q8,#25
2161	ldr	r2,[sp,#36]
2162	and	r3,r3,r12
2163	vshr.u32	q11,q8,#18
2164	add	r7,r7,r11
2165	add	r11,r11,r0,ror#2
2166	eor	r3,r3,r5
2167	veor	q9,q9,q10
2168	add	r10,r10,r2
2169	vsli.32	q11,q8,#14
2170	eor	r2,r8,r9
2171	eor	r0,r7,r7,ror#5
2172	vshr.u32	d24,d3,#17
2173	add	r11,r11,r3
2174	and	r2,r2,r7
2175	veor	q9,q9,q11
2176	eor	r3,r0,r7,ror#19
2177	eor	r0,r11,r11,ror#11
2178	vsli.32	d24,d3,#15
2179	eor	r2,r2,r9
2180	add	r10,r10,r3,ror#6
2181	vshr.u32	d25,d3,#10
2182	eor	r3,r11,r4
2183	eor	r0,r0,r11,ror#20
2184	vadd.i32	q2,q2,q9
2185	add	r10,r10,r2
2186	ldr	r2,[sp,#40]
2187	veor	d25,d25,d24
2188	and	r12,r12,r3
2189	add	r6,r6,r10
2190	vshr.u32	d24,d3,#19
2191	add	r10,r10,r0,ror#2
2192	eor	r12,r12,r4
2193	vsli.32	d24,d3,#13
2194	add	r9,r9,r2
2195	eor	r2,r7,r8
2196	veor	d25,d25,d24
2197	eor	r0,r6,r6,ror#5
2198	add	r10,r10,r12
2199	vadd.i32	d4,d4,d25
2200	and	r2,r2,r6
2201	eor	r12,r0,r6,ror#19
2202	vshr.u32	d24,d4,#17
2203	eor	r0,r10,r10,ror#11
2204	eor	r2,r2,r8
2205	vsli.32	d24,d4,#15
2206	add	r9,r9,r12,ror#6
2207	eor	r12,r10,r11
2208	vshr.u32	d25,d4,#10
2209	eor	r0,r0,r10,ror#20
2210	add	r9,r9,r2
2211	veor	d25,d25,d24
2212	ldr	r2,[sp,#44]
2213	and	r3,r3,r12
2214	vshr.u32	d24,d4,#19
2215	add	r5,r5,r9
2216	add	r9,r9,r0,ror#2
2217	eor	r3,r3,r11
2218	vld1.32	{q8},[r14,:128]!
2219	add	r8,r8,r2
2220	vsli.32	d24,d4,#13
2221	eor	r2,r6,r7
2222	eor	r0,r5,r5,ror#5
2223	veor	d25,d25,d24
2224	add	r9,r9,r3
2225	and	r2,r2,r5
2226	vadd.i32	d5,d5,d25
2227	eor	r3,r0,r5,ror#19
2228	eor	r0,r9,r9,ror#11
2229	vadd.i32	q8,q8,q2
2230	eor	r2,r2,r7
2231	add	r8,r8,r3,ror#6
2232	eor	r3,r9,r10
2233	eor	r0,r0,r9,ror#20
2234	add	r8,r8,r2
2235	ldr	r2,[sp,#48]
2236	and	r12,r12,r3
2237	add	r4,r4,r8
2238	vst1.32	{q8},[r1,:128]!
2239	add	r8,r8,r0,ror#2
2240	eor	r12,r12,r10
2241	vext.8	q8,q3,q0,#4
2242	add	r7,r7,r2
2243	eor	r2,r5,r6
2244	eor	r0,r4,r4,ror#5
2245	vext.8	q9,q1,q2,#4
2246	add	r8,r8,r12
2247	and	r2,r2,r4
2248	eor	r12,r0,r4,ror#19
2249	vshr.u32	q10,q8,#7
2250	eor	r0,r8,r8,ror#11
2251	eor	r2,r2,r6
2252	vadd.i32	q3,q3,q9
2253	add	r7,r7,r12,ror#6
2254	eor	r12,r8,r9
2255	vshr.u32	q9,q8,#3
2256	eor	r0,r0,r8,ror#20
2257	add	r7,r7,r2
2258	vsli.32	q10,q8,#25
2259	ldr	r2,[sp,#52]
2260	and	r3,r3,r12
2261	vshr.u32	q11,q8,#18
2262	add	r11,r11,r7
2263	add	r7,r7,r0,ror#2
2264	eor	r3,r3,r9
2265	veor	q9,q9,q10
2266	add	r6,r6,r2
2267	vsli.32	q11,q8,#14
2268	eor	r2,r4,r5
2269	eor	r0,r11,r11,ror#5
2270	vshr.u32	d24,d5,#17
2271	add	r7,r7,r3
2272	and	r2,r2,r11
2273	veor	q9,q9,q11
2274	eor	r3,r0,r11,ror#19
2275	eor	r0,r7,r7,ror#11
2276	vsli.32	d24,d5,#15
2277	eor	r2,r2,r5
2278	add	r6,r6,r3,ror#6
2279	vshr.u32	d25,d5,#10
2280	eor	r3,r7,r8
2281	eor	r0,r0,r7,ror#20
2282	vadd.i32	q3,q3,q9
2283	add	r6,r6,r2
2284	ldr	r2,[sp,#56]
2285	veor	d25,d25,d24
2286	and	r12,r12,r3
2287	add	r10,r10,r6
2288	vshr.u32	d24,d5,#19
2289	add	r6,r6,r0,ror#2
2290	eor	r12,r12,r8
2291	vsli.32	d24,d5,#13
2292	add	r5,r5,r2
2293	eor	r2,r11,r4
2294	veor	d25,d25,d24
2295	eor	r0,r10,r10,ror#5
2296	add	r6,r6,r12
2297	vadd.i32	d6,d6,d25
2298	and	r2,r2,r10
2299	eor	r12,r0,r10,ror#19
2300	vshr.u32	d24,d6,#17
2301	eor	r0,r6,r6,ror#11
2302	eor	r2,r2,r4
2303	vsli.32	d24,d6,#15
2304	add	r5,r5,r12,ror#6
2305	eor	r12,r6,r7
2306	vshr.u32	d25,d6,#10
2307	eor	r0,r0,r6,ror#20
2308	add	r5,r5,r2
2309	veor	d25,d25,d24
2310	ldr	r2,[sp,#60]
2311	and	r3,r3,r12
2312	vshr.u32	d24,d6,#19
2313	add	r9,r9,r5
2314	add	r5,r5,r0,ror#2
2315	eor	r3,r3,r7
2316	vld1.32	{q8},[r14,:128]!
2317	add	r4,r4,r2
2318	vsli.32	d24,d6,#13
2319	eor	r2,r10,r11
2320	eor	r0,r9,r9,ror#5
2321	veor	d25,d25,d24
2322	add	r5,r5,r3
2323	and	r2,r2,r9
2324	vadd.i32	d7,d7,d25
2325	eor	r3,r0,r9,ror#19
2326	eor	r0,r5,r5,ror#11
2327	vadd.i32	q8,q8,q3
2328	eor	r2,r2,r11
2329	add	r4,r4,r3,ror#6
2330	eor	r3,r5,r6
2331	eor	r0,r0,r5,ror#20
2332	add	r4,r4,r2
2333	ldr	r2,[r14]
2334	and	r12,r12,r3
2335	add	r8,r8,r4
2336	vst1.32	{q8},[r1,:128]!
2337	add	r4,r4,r0,ror#2
2338	eor	r12,r12,r6
2339	teq	r2,#0				@ check for K256 terminator
2340	ldr	r2,[sp,#0]
2341	sub	r1,r1,#64
2342	bne	.L_00_48
2343
2344	ldr	r1,[sp,#68]
2345	ldr	r0,[sp,#72]
2346	sub	r14,r14,#256	@ rewind r14
2347	teq	r1,r0
2348	it	eq
2349	subeq	r1,r1,#64		@ avoid SEGV
2350	vld1.8	{q0},[r1]!		@ load next input block
2351	vld1.8	{q1},[r1]!
2352	vld1.8	{q2},[r1]!
2353	vld1.8	{q3},[r1]!
2354	it	ne
2355	strne	r1,[sp,#68]
2356	mov	r1,sp
2357	add	r11,r11,r2
2358	eor	r2,r9,r10
2359	eor	r0,r8,r8,ror#5
2360	add	r4,r4,r12
2361	vld1.32	{q8},[r14,:128]!
2362	and	r2,r2,r8
2363	eor	r12,r0,r8,ror#19
2364	eor	r0,r4,r4,ror#11
2365	eor	r2,r2,r10
2366	vrev32.8	q0,q0
2367	add	r11,r11,r12,ror#6
2368	eor	r12,r4,r5
2369	eor	r0,r0,r4,ror#20
2370	add	r11,r11,r2
2371	vadd.i32	q8,q8,q0
2372	ldr	r2,[sp,#4]
2373	and	r3,r3,r12
2374	add	r7,r7,r11
2375	add	r11,r11,r0,ror#2
2376	eor	r3,r3,r5
2377	add	r10,r10,r2
2378	eor	r2,r8,r9
2379	eor	r0,r7,r7,ror#5
2380	add	r11,r11,r3
2381	and	r2,r2,r7
2382	eor	r3,r0,r7,ror#19
2383	eor	r0,r11,r11,ror#11
2384	eor	r2,r2,r9
2385	add	r10,r10,r3,ror#6
2386	eor	r3,r11,r4
2387	eor	r0,r0,r11,ror#20
2388	add	r10,r10,r2
2389	ldr	r2,[sp,#8]
2390	and	r12,r12,r3
2391	add	r6,r6,r10
2392	add	r10,r10,r0,ror#2
2393	eor	r12,r12,r4
2394	add	r9,r9,r2
2395	eor	r2,r7,r8
2396	eor	r0,r6,r6,ror#5
2397	add	r10,r10,r12
2398	and	r2,r2,r6
2399	eor	r12,r0,r6,ror#19
2400	eor	r0,r10,r10,ror#11
2401	eor	r2,r2,r8
2402	add	r9,r9,r12,ror#6
2403	eor	r12,r10,r11
2404	eor	r0,r0,r10,ror#20
2405	add	r9,r9,r2
2406	ldr	r2,[sp,#12]
2407	and	r3,r3,r12
2408	add	r5,r5,r9
2409	add	r9,r9,r0,ror#2
2410	eor	r3,r3,r11
2411	add	r8,r8,r2
2412	eor	r2,r6,r7
2413	eor	r0,r5,r5,ror#5
2414	add	r9,r9,r3
2415	and	r2,r2,r5
2416	eor	r3,r0,r5,ror#19
2417	eor	r0,r9,r9,ror#11
2418	eor	r2,r2,r7
2419	add	r8,r8,r3,ror#6
2420	eor	r3,r9,r10
2421	eor	r0,r0,r9,ror#20
2422	add	r8,r8,r2
2423	ldr	r2,[sp,#16]
2424	and	r12,r12,r3
2425	add	r4,r4,r8
2426	add	r8,r8,r0,ror#2
2427	eor	r12,r12,r10
2428	vst1.32	{q8},[r1,:128]!
2429	add	r7,r7,r2
2430	eor	r2,r5,r6
2431	eor	r0,r4,r4,ror#5
2432	add	r8,r8,r12
2433	vld1.32	{q8},[r14,:128]!
2434	and	r2,r2,r4
2435	eor	r12,r0,r4,ror#19
2436	eor	r0,r8,r8,ror#11
2437	eor	r2,r2,r6
2438	vrev32.8	q1,q1
2439	add	r7,r7,r12,ror#6
2440	eor	r12,r8,r9
2441	eor	r0,r0,r8,ror#20
2442	add	r7,r7,r2
2443	vadd.i32	q8,q8,q1
2444	ldr	r2,[sp,#20]
2445	and	r3,r3,r12
2446	add	r11,r11,r7
2447	add	r7,r7,r0,ror#2
2448	eor	r3,r3,r9
2449	add	r6,r6,r2
2450	eor	r2,r4,r5
2451	eor	r0,r11,r11,ror#5
2452	add	r7,r7,r3
2453	and	r2,r2,r11
2454	eor	r3,r0,r11,ror#19
2455	eor	r0,r7,r7,ror#11
2456	eor	r2,r2,r5
2457	add	r6,r6,r3,ror#6
2458	eor	r3,r7,r8
2459	eor	r0,r0,r7,ror#20
2460	add	r6,r6,r2
2461	ldr	r2,[sp,#24]
2462	and	r12,r12,r3
2463	add	r10,r10,r6
2464	add	r6,r6,r0,ror#2
2465	eor	r12,r12,r8
2466	add	r5,r5,r2
2467	eor	r2,r11,r4
2468	eor	r0,r10,r10,ror#5
2469	add	r6,r6,r12
2470	and	r2,r2,r10
2471	eor	r12,r0,r10,ror#19
2472	eor	r0,r6,r6,ror#11
2473	eor	r2,r2,r4
2474	add	r5,r5,r12,ror#6
2475	eor	r12,r6,r7
2476	eor	r0,r0,r6,ror#20
2477	add	r5,r5,r2
2478	ldr	r2,[sp,#28]
2479	and	r3,r3,r12
2480	add	r9,r9,r5
2481	add	r5,r5,r0,ror#2
2482	eor	r3,r3,r7
2483	add	r4,r4,r2
2484	eor	r2,r10,r11
2485	eor	r0,r9,r9,ror#5
2486	add	r5,r5,r3
2487	and	r2,r2,r9
2488	eor	r3,r0,r9,ror#19
2489	eor	r0,r5,r5,ror#11
2490	eor	r2,r2,r11
2491	add	r4,r4,r3,ror#6
2492	eor	r3,r5,r6
2493	eor	r0,r0,r5,ror#20
2494	add	r4,r4,r2
2495	ldr	r2,[sp,#32]
2496	and	r12,r12,r3
2497	add	r8,r8,r4
2498	add	r4,r4,r0,ror#2
2499	eor	r12,r12,r6
2500	vst1.32	{q8},[r1,:128]!
2501	add	r11,r11,r2
2502	eor	r2,r9,r10
2503	eor	r0,r8,r8,ror#5
2504	add	r4,r4,r12
2505	vld1.32	{q8},[r14,:128]!
2506	and	r2,r2,r8
2507	eor	r12,r0,r8,ror#19
2508	eor	r0,r4,r4,ror#11
2509	eor	r2,r2,r10
2510	vrev32.8	q2,q2
2511	add	r11,r11,r12,ror#6
2512	eor	r12,r4,r5
2513	eor	r0,r0,r4,ror#20
2514	add	r11,r11,r2
2515	vadd.i32	q8,q8,q2
2516	ldr	r2,[sp,#36]
2517	and	r3,r3,r12
2518	add	r7,r7,r11
2519	add	r11,r11,r0,ror#2
2520	eor	r3,r3,r5
2521	add	r10,r10,r2
2522	eor	r2,r8,r9
2523	eor	r0,r7,r7,ror#5
2524	add	r11,r11,r3
2525	and	r2,r2,r7
2526	eor	r3,r0,r7,ror#19
2527	eor	r0,r11,r11,ror#11
2528	eor	r2,r2,r9
2529	add	r10,r10,r3,ror#6
2530	eor	r3,r11,r4
2531	eor	r0,r0,r11,ror#20
2532	add	r10,r10,r2
2533	ldr	r2,[sp,#40]
2534	and	r12,r12,r3
2535	add	r6,r6,r10
2536	add	r10,r10,r0,ror#2
2537	eor	r12,r12,r4
2538	add	r9,r9,r2
2539	eor	r2,r7,r8
2540	eor	r0,r6,r6,ror#5
2541	add	r10,r10,r12
2542	and	r2,r2,r6
2543	eor	r12,r0,r6,ror#19
2544	eor	r0,r10,r10,ror#11
2545	eor	r2,r2,r8
2546	add	r9,r9,r12,ror#6
2547	eor	r12,r10,r11
2548	eor	r0,r0,r10,ror#20
2549	add	r9,r9,r2
2550	ldr	r2,[sp,#44]
2551	and	r3,r3,r12
2552	add	r5,r5,r9
2553	add	r9,r9,r0,ror#2
2554	eor	r3,r3,r11
2555	add	r8,r8,r2
2556	eor	r2,r6,r7
2557	eor	r0,r5,r5,ror#5
2558	add	r9,r9,r3
2559	and	r2,r2,r5
2560	eor	r3,r0,r5,ror#19
2561	eor	r0,r9,r9,ror#11
2562	eor	r2,r2,r7
2563	add	r8,r8,r3,ror#6
2564	eor	r3,r9,r10
2565	eor	r0,r0,r9,ror#20
2566	add	r8,r8,r2
2567	ldr	r2,[sp,#48]
2568	and	r12,r12,r3
2569	add	r4,r4,r8
2570	add	r8,r8,r0,ror#2
2571	eor	r12,r12,r10
2572	vst1.32	{q8},[r1,:128]!
2573	add	r7,r7,r2
2574	eor	r2,r5,r6
2575	eor	r0,r4,r4,ror#5
2576	add	r8,r8,r12
2577	vld1.32	{q8},[r14,:128]!
2578	and	r2,r2,r4
2579	eor	r12,r0,r4,ror#19
2580	eor	r0,r8,r8,ror#11
2581	eor	r2,r2,r6
2582	vrev32.8	q3,q3
2583	add	r7,r7,r12,ror#6
2584	eor	r12,r8,r9
2585	eor	r0,r0,r8,ror#20
2586	add	r7,r7,r2
2587	vadd.i32	q8,q8,q3
2588	ldr	r2,[sp,#52]
2589	and	r3,r3,r12
2590	add	r11,r11,r7
2591	add	r7,r7,r0,ror#2
2592	eor	r3,r3,r9
2593	add	r6,r6,r2
2594	eor	r2,r4,r5
2595	eor	r0,r11,r11,ror#5
2596	add	r7,r7,r3
2597	and	r2,r2,r11
2598	eor	r3,r0,r11,ror#19
2599	eor	r0,r7,r7,ror#11
2600	eor	r2,r2,r5
2601	add	r6,r6,r3,ror#6
2602	eor	r3,r7,r8
2603	eor	r0,r0,r7,ror#20
2604	add	r6,r6,r2
2605	ldr	r2,[sp,#56]
2606	and	r12,r12,r3
2607	add	r10,r10,r6
2608	add	r6,r6,r0,ror#2
2609	eor	r12,r12,r8
2610	add	r5,r5,r2
2611	eor	r2,r11,r4
2612	eor	r0,r10,r10,ror#5
2613	add	r6,r6,r12
2614	and	r2,r2,r10
2615	eor	r12,r0,r10,ror#19
2616	eor	r0,r6,r6,ror#11
2617	eor	r2,r2,r4
2618	add	r5,r5,r12,ror#6
2619	eor	r12,r6,r7
2620	eor	r0,r0,r6,ror#20
2621	add	r5,r5,r2
2622	ldr	r2,[sp,#60]
2623	and	r3,r3,r12
2624	add	r9,r9,r5
2625	add	r5,r5,r0,ror#2
2626	eor	r3,r3,r7
2627	add	r4,r4,r2
2628	eor	r2,r10,r11
2629	eor	r0,r9,r9,ror#5
2630	add	r5,r5,r3
2631	and	r2,r2,r9
2632	eor	r3,r0,r9,ror#19
2633	eor	r0,r5,r5,ror#11
2634	eor	r2,r2,r11
2635	add	r4,r4,r3,ror#6
2636	eor	r3,r5,r6
2637	eor	r0,r0,r5,ror#20
2638	add	r4,r4,r2
2639	ldr	r2,[sp,#64]
2640	and	r12,r12,r3
2641	add	r8,r8,r4
2642	add	r4,r4,r0,ror#2
2643	eor	r12,r12,r6
2644	vst1.32	{q8},[r1,:128]!
2645	ldr	r0,[r2,#0]
2646	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2647	ldr	r12,[r2,#4]
2648	ldr	r3,[r2,#8]
2649	ldr	r1,[r2,#12]
2650	add	r4,r4,r0			@ accumulate
2651	ldr	r0,[r2,#16]
2652	add	r5,r5,r12
2653	ldr	r12,[r2,#20]
2654	add	r6,r6,r3
2655	ldr	r3,[r2,#24]
2656	add	r7,r7,r1
2657	ldr	r1,[r2,#28]
2658	add	r8,r8,r0
2659	str	r4,[r2],#4
2660	add	r9,r9,r12
2661	str	r5,[r2],#4
2662	add	r10,r10,r3
2663	str	r6,[r2],#4
2664	add	r11,r11,r1
2665	str	r7,[r2],#4
2666	stmia	r2,{r8,r9,r10,r11}
2667
2668	ittte	ne
2669	movne	r1,sp
2670	ldrne	r2,[sp,#0]
2671	eorne	r12,r12,r12
2672	ldreq	sp,[sp,#76]			@ restore original sp
2673	itt	ne
2674	eorne	r3,r5,r6
2675	bne	.L_00_48
2676
2677	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2678.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2679#endif
2680#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2681
2682# if defined(__thumb2__)
2683#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2684# else
2685#  define INST(a,b,c,d)	.byte	a,b,c,d
2686# endif
2687
2688.type	sha256_block_data_order_armv8,%function
2689.align	5
2690sha256_block_data_order_armv8:
2691.LARMv8:
2692	vld1.32	{q0,q1},[r0]
2693	sub	r3,r3,#256+32
2694	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2695	b	.Loop_v8
2696
2697.align	4
2698.Loop_v8:
2699	vld1.8	{q8,q9},[r1]!
2700	vld1.8	{q10,q11},[r1]!
2701	vld1.32	{q12},[r3]!
2702	vrev32.8	q8,q8
2703	vrev32.8	q9,q9
2704	vrev32.8	q10,q10
2705	vrev32.8	q11,q11
2706	vmov	q14,q0	@ offload
2707	vmov	q15,q1
2708	teq	r1,r2
2709	vld1.32	{q13},[r3]!
2710	vadd.i32	q12,q12,q8
2711	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2712	vmov	q2,q0
2713	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2714	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2715	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2716	vld1.32	{q12},[r3]!
2717	vadd.i32	q13,q13,q9
2718	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2719	vmov	q2,q0
2720	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2721	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2722	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2723	vld1.32	{q13},[r3]!
2724	vadd.i32	q12,q12,q10
2725	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2726	vmov	q2,q0
2727	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2728	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2729	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2730	vld1.32	{q12},[r3]!
2731	vadd.i32	q13,q13,q11
2732	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2733	vmov	q2,q0
2734	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2735	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2736	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2737	vld1.32	{q13},[r3]!
2738	vadd.i32	q12,q12,q8
2739	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2740	vmov	q2,q0
2741	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2742	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2743	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2744	vld1.32	{q12},[r3]!
2745	vadd.i32	q13,q13,q9
2746	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2747	vmov	q2,q0
2748	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2749	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2750	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2751	vld1.32	{q13},[r3]!
2752	vadd.i32	q12,q12,q10
2753	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2754	vmov	q2,q0
2755	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2756	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2757	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2758	vld1.32	{q12},[r3]!
2759	vadd.i32	q13,q13,q11
2760	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2761	vmov	q2,q0
2762	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2763	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2764	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2765	vld1.32	{q13},[r3]!
2766	vadd.i32	q12,q12,q8
2767	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2768	vmov	q2,q0
2769	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2770	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2771	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2772	vld1.32	{q12},[r3]!
2773	vadd.i32	q13,q13,q9
2774	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2775	vmov	q2,q0
2776	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2777	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2778	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2779	vld1.32	{q13},[r3]!
2780	vadd.i32	q12,q12,q10
2781	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2782	vmov	q2,q0
2783	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2784	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2785	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2786	vld1.32	{q12},[r3]!
2787	vadd.i32	q13,q13,q11
2788	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2789	vmov	q2,q0
2790	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2791	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2792	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2793	vld1.32	{q13},[r3]!
2794	vadd.i32	q12,q12,q8
2795	vmov	q2,q0
2796	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2797	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2798
2799	vld1.32	{q12},[r3]!
2800	vadd.i32	q13,q13,q9
2801	vmov	q2,q0
2802	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2803	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2804
2805	vld1.32	{q13},[r3]
2806	vadd.i32	q12,q12,q10
2807	sub	r3,r3,#256-16	@ rewind
2808	vmov	q2,q0
2809	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2810	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2811
2812	vadd.i32	q13,q13,q11
2813	vmov	q2,q0
2814	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2815	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2816
2817	vadd.i32	q0,q0,q14
2818	vadd.i32	q1,q1,q15
2819	it	ne
2820	bne	.Loop_v8
2821
2822	vst1.32	{q0,q1},[r0]
2823
2824	bx	lr		@ bx lr
2825.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2826#endif
2827.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2828.align	2
2829#endif
2830#endif  // !OPENSSL_NO_ASM
2831.section	.note.GNU-stack,"",%progbits
2832