1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(BORINGSSL_PREFIX)
13#include <boringssl_prefix_symbols_asm.h>
14#endif
15@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
16@
17@ Licensed under the OpenSSL license (the "License").  You may not use
18@ this file except in compliance with the License.  You can obtain a copy
19@ in the file LICENSE in the source distribution or at
20@ https://www.openssl.org/source/license.html
21
22
23@ ====================================================================
24@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
25@ project. The module is, however, dual licensed under OpenSSL and
26@ CRYPTOGAMS licenses depending on where you obtain it. For further
27@ details see http://www.openssl.org/~appro/cryptogams/.
28@
29@ Permission to use under GPL terms is granted.
30@ ====================================================================
31
32@ SHA256 block procedure for ARMv4. May 2007.
33
34@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
35@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
36@ byte [on single-issue Xscale PXA250 core].
37
38@ July 2010.
39@
40@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
41@ Cortex A8 core and ~20 cycles per processed byte.
42
43@ February 2011.
44@
45@ Profiler-assisted and platform-specific optimization resulted in 16%
46@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
47
48@ September 2013.
49@
50@ Add NEON implementation. On Cortex A8 it was measured to process one
51@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
52@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
53@ code (meaning that latter performs sub-optimally, nothing was done
54@ about it).
55
56@ May 2014.
57@
58@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
59
60#ifndef __KERNEL__
61# include <openssl/arm_arch.h>
62#else
63# define __ARM_ARCH__ __LINUX_ARM_ARCH__
64# define __ARM_MAX_ARCH__ 7
65#endif
66
67@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
68@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
69@ instructions are manually-encoded. (See unsha256.)
70
71
72.text
73#if defined(__thumb2__)
74.syntax	unified
75.thumb
76#else
77.code	32
78#endif
79
80
81.align	5
82K256:
83.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
84.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
85.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
86.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
87.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
88.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
89.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
90.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
91.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
92.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
93.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
94.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
95.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
96.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
97.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
98.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
99
100.word	0				@ terminator
101#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
102LOPENSSL_armcap:
103.word	OPENSSL_armcap_P-Lsha256_block_data_order
104#endif
105.align	5
106
107.globl	_sha256_block_data_order
108.private_extern	_sha256_block_data_order
109#ifdef __thumb2__
110.thumb_func	_sha256_block_data_order
111#endif
112_sha256_block_data_order:
113Lsha256_block_data_order:
114#if __ARM_ARCH__<7 && !defined(__thumb2__)
115	sub	r3,pc,#8		@ _sha256_block_data_order
116#else
117	adr	r3,Lsha256_block_data_order
118#endif
119#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
120	ldr	r12,LOPENSSL_armcap
121	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
122#ifdef	__APPLE__
123	ldr	r12,[r12]
124#endif
125	tst	r12,#ARMV8_SHA256
126	bne	LARMv8
127	tst	r12,#ARMV7_NEON
128	bne	LNEON
129#endif
130	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
131	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
132	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
133	sub	r14,r3,#256+32	@ K256
134	sub	sp,sp,#16*4		@ alloca(X[16])
135Loop:
136# if __ARM_ARCH__>=7
137	ldr	r2,[r1],#4
138# else
139	ldrb	r2,[r1,#3]
140# endif
141	eor	r3,r5,r6		@ magic
142	eor	r12,r12,r12
143#if __ARM_ARCH__>=7
144	@ ldr	r2,[r1],#4			@ 0
145# if 0==15
146	str	r1,[sp,#17*4]			@ make room for r1
147# endif
148	eor	r0,r8,r8,ror#5
149	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
150	eor	r0,r0,r8,ror#19	@ Sigma1(e)
151# ifndef __ARMEB__
152	rev	r2,r2
153# endif
154#else
155	@ ldrb	r2,[r1,#3]			@ 0
156	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
157	ldrb	r12,[r1,#2]
158	ldrb	r0,[r1,#1]
159	orr	r2,r2,r12,lsl#8
160	ldrb	r12,[r1],#4
161	orr	r2,r2,r0,lsl#16
162# if 0==15
163	str	r1,[sp,#17*4]			@ make room for r1
164# endif
165	eor	r0,r8,r8,ror#5
166	orr	r2,r2,r12,lsl#24
167	eor	r0,r0,r8,ror#19	@ Sigma1(e)
168#endif
169	ldr	r12,[r14],#4			@ *K256++
170	add	r11,r11,r2			@ h+=X[i]
171	str	r2,[sp,#0*4]
172	eor	r2,r9,r10
173	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
174	and	r2,r2,r8
175	add	r11,r11,r12			@ h+=K256[i]
176	eor	r2,r2,r10			@ Ch(e,f,g)
177	eor	r0,r4,r4,ror#11
178	add	r11,r11,r2			@ h+=Ch(e,f,g)
179#if 0==31
180	and	r12,r12,#0xff
181	cmp	r12,#0xf2			@ done?
182#endif
183#if 0<15
184# if __ARM_ARCH__>=7
185	ldr	r2,[r1],#4			@ prefetch
186# else
187	ldrb	r2,[r1,#3]
188# endif
189	eor	r12,r4,r5			@ a^b, b^c in next round
190#else
191	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
192	eor	r12,r4,r5			@ a^b, b^c in next round
193	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
194#endif
195	eor	r0,r0,r4,ror#20	@ Sigma0(a)
196	and	r3,r3,r12			@ (b^c)&=(a^b)
197	add	r7,r7,r11			@ d+=h
198	eor	r3,r3,r5			@ Maj(a,b,c)
199	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
200	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
201#if __ARM_ARCH__>=7
202	@ ldr	r2,[r1],#4			@ 1
203# if 1==15
204	str	r1,[sp,#17*4]			@ make room for r1
205# endif
206	eor	r0,r7,r7,ror#5
207	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
208	eor	r0,r0,r7,ror#19	@ Sigma1(e)
209# ifndef __ARMEB__
210	rev	r2,r2
211# endif
212#else
213	@ ldrb	r2,[r1,#3]			@ 1
214	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
215	ldrb	r3,[r1,#2]
216	ldrb	r0,[r1,#1]
217	orr	r2,r2,r3,lsl#8
218	ldrb	r3,[r1],#4
219	orr	r2,r2,r0,lsl#16
220# if 1==15
221	str	r1,[sp,#17*4]			@ make room for r1
222# endif
223	eor	r0,r7,r7,ror#5
224	orr	r2,r2,r3,lsl#24
225	eor	r0,r0,r7,ror#19	@ Sigma1(e)
226#endif
227	ldr	r3,[r14],#4			@ *K256++
228	add	r10,r10,r2			@ h+=X[i]
229	str	r2,[sp,#1*4]
230	eor	r2,r8,r9
231	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
232	and	r2,r2,r7
233	add	r10,r10,r3			@ h+=K256[i]
234	eor	r2,r2,r9			@ Ch(e,f,g)
235	eor	r0,r11,r11,ror#11
236	add	r10,r10,r2			@ h+=Ch(e,f,g)
237#if 1==31
238	and	r3,r3,#0xff
239	cmp	r3,#0xf2			@ done?
240#endif
241#if 1<15
242# if __ARM_ARCH__>=7
243	ldr	r2,[r1],#4			@ prefetch
244# else
245	ldrb	r2,[r1,#3]
246# endif
247	eor	r3,r11,r4			@ a^b, b^c in next round
248#else
249	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
250	eor	r3,r11,r4			@ a^b, b^c in next round
251	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
252#endif
253	eor	r0,r0,r11,ror#20	@ Sigma0(a)
254	and	r12,r12,r3			@ (b^c)&=(a^b)
255	add	r6,r6,r10			@ d+=h
256	eor	r12,r12,r4			@ Maj(a,b,c)
257	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
258	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
259#if __ARM_ARCH__>=7
260	@ ldr	r2,[r1],#4			@ 2
261# if 2==15
262	str	r1,[sp,#17*4]			@ make room for r1
263# endif
264	eor	r0,r6,r6,ror#5
265	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
266	eor	r0,r0,r6,ror#19	@ Sigma1(e)
267# ifndef __ARMEB__
268	rev	r2,r2
269# endif
270#else
271	@ ldrb	r2,[r1,#3]			@ 2
272	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
273	ldrb	r12,[r1,#2]
274	ldrb	r0,[r1,#1]
275	orr	r2,r2,r12,lsl#8
276	ldrb	r12,[r1],#4
277	orr	r2,r2,r0,lsl#16
278# if 2==15
279	str	r1,[sp,#17*4]			@ make room for r1
280# endif
281	eor	r0,r6,r6,ror#5
282	orr	r2,r2,r12,lsl#24
283	eor	r0,r0,r6,ror#19	@ Sigma1(e)
284#endif
285	ldr	r12,[r14],#4			@ *K256++
286	add	r9,r9,r2			@ h+=X[i]
287	str	r2,[sp,#2*4]
288	eor	r2,r7,r8
289	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
290	and	r2,r2,r6
291	add	r9,r9,r12			@ h+=K256[i]
292	eor	r2,r2,r8			@ Ch(e,f,g)
293	eor	r0,r10,r10,ror#11
294	add	r9,r9,r2			@ h+=Ch(e,f,g)
295#if 2==31
296	and	r12,r12,#0xff
297	cmp	r12,#0xf2			@ done?
298#endif
299#if 2<15
300# if __ARM_ARCH__>=7
301	ldr	r2,[r1],#4			@ prefetch
302# else
303	ldrb	r2,[r1,#3]
304# endif
305	eor	r12,r10,r11			@ a^b, b^c in next round
306#else
307	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
308	eor	r12,r10,r11			@ a^b, b^c in next round
309	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
310#endif
311	eor	r0,r0,r10,ror#20	@ Sigma0(a)
312	and	r3,r3,r12			@ (b^c)&=(a^b)
313	add	r5,r5,r9			@ d+=h
314	eor	r3,r3,r11			@ Maj(a,b,c)
315	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
316	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
317#if __ARM_ARCH__>=7
318	@ ldr	r2,[r1],#4			@ 3
319# if 3==15
320	str	r1,[sp,#17*4]			@ make room for r1
321# endif
322	eor	r0,r5,r5,ror#5
323	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
324	eor	r0,r0,r5,ror#19	@ Sigma1(e)
325# ifndef __ARMEB__
326	rev	r2,r2
327# endif
328#else
329	@ ldrb	r2,[r1,#3]			@ 3
330	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
331	ldrb	r3,[r1,#2]
332	ldrb	r0,[r1,#1]
333	orr	r2,r2,r3,lsl#8
334	ldrb	r3,[r1],#4
335	orr	r2,r2,r0,lsl#16
336# if 3==15
337	str	r1,[sp,#17*4]			@ make room for r1
338# endif
339	eor	r0,r5,r5,ror#5
340	orr	r2,r2,r3,lsl#24
341	eor	r0,r0,r5,ror#19	@ Sigma1(e)
342#endif
343	ldr	r3,[r14],#4			@ *K256++
344	add	r8,r8,r2			@ h+=X[i]
345	str	r2,[sp,#3*4]
346	eor	r2,r6,r7
347	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
348	and	r2,r2,r5
349	add	r8,r8,r3			@ h+=K256[i]
350	eor	r2,r2,r7			@ Ch(e,f,g)
351	eor	r0,r9,r9,ror#11
352	add	r8,r8,r2			@ h+=Ch(e,f,g)
353#if 3==31
354	and	r3,r3,#0xff
355	cmp	r3,#0xf2			@ done?
356#endif
357#if 3<15
358# if __ARM_ARCH__>=7
359	ldr	r2,[r1],#4			@ prefetch
360# else
361	ldrb	r2,[r1,#3]
362# endif
363	eor	r3,r9,r10			@ a^b, b^c in next round
364#else
365	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
366	eor	r3,r9,r10			@ a^b, b^c in next round
367	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
368#endif
369	eor	r0,r0,r9,ror#20	@ Sigma0(a)
370	and	r12,r12,r3			@ (b^c)&=(a^b)
371	add	r4,r4,r8			@ d+=h
372	eor	r12,r12,r10			@ Maj(a,b,c)
373	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
374	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
375#if __ARM_ARCH__>=7
376	@ ldr	r2,[r1],#4			@ 4
377# if 4==15
378	str	r1,[sp,#17*4]			@ make room for r1
379# endif
380	eor	r0,r4,r4,ror#5
381	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
382	eor	r0,r0,r4,ror#19	@ Sigma1(e)
383# ifndef __ARMEB__
384	rev	r2,r2
385# endif
386#else
387	@ ldrb	r2,[r1,#3]			@ 4
388	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
389	ldrb	r12,[r1,#2]
390	ldrb	r0,[r1,#1]
391	orr	r2,r2,r12,lsl#8
392	ldrb	r12,[r1],#4
393	orr	r2,r2,r0,lsl#16
394# if 4==15
395	str	r1,[sp,#17*4]			@ make room for r1
396# endif
397	eor	r0,r4,r4,ror#5
398	orr	r2,r2,r12,lsl#24
399	eor	r0,r0,r4,ror#19	@ Sigma1(e)
400#endif
401	ldr	r12,[r14],#4			@ *K256++
402	add	r7,r7,r2			@ h+=X[i]
403	str	r2,[sp,#4*4]
404	eor	r2,r5,r6
405	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
406	and	r2,r2,r4
407	add	r7,r7,r12			@ h+=K256[i]
408	eor	r2,r2,r6			@ Ch(e,f,g)
409	eor	r0,r8,r8,ror#11
410	add	r7,r7,r2			@ h+=Ch(e,f,g)
411#if 4==31
412	and	r12,r12,#0xff
413	cmp	r12,#0xf2			@ done?
414#endif
415#if 4<15
416# if __ARM_ARCH__>=7
417	ldr	r2,[r1],#4			@ prefetch
418# else
419	ldrb	r2,[r1,#3]
420# endif
421	eor	r12,r8,r9			@ a^b, b^c in next round
422#else
423	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
424	eor	r12,r8,r9			@ a^b, b^c in next round
425	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
426#endif
427	eor	r0,r0,r8,ror#20	@ Sigma0(a)
428	and	r3,r3,r12			@ (b^c)&=(a^b)
429	add	r11,r11,r7			@ d+=h
430	eor	r3,r3,r9			@ Maj(a,b,c)
431	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
432	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
433#if __ARM_ARCH__>=7
434	@ ldr	r2,[r1],#4			@ 5
435# if 5==15
436	str	r1,[sp,#17*4]			@ make room for r1
437# endif
438	eor	r0,r11,r11,ror#5
439	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
440	eor	r0,r0,r11,ror#19	@ Sigma1(e)
441# ifndef __ARMEB__
442	rev	r2,r2
443# endif
444#else
445	@ ldrb	r2,[r1,#3]			@ 5
446	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
447	ldrb	r3,[r1,#2]
448	ldrb	r0,[r1,#1]
449	orr	r2,r2,r3,lsl#8
450	ldrb	r3,[r1],#4
451	orr	r2,r2,r0,lsl#16
452# if 5==15
453	str	r1,[sp,#17*4]			@ make room for r1
454# endif
455	eor	r0,r11,r11,ror#5
456	orr	r2,r2,r3,lsl#24
457	eor	r0,r0,r11,ror#19	@ Sigma1(e)
458#endif
459	ldr	r3,[r14],#4			@ *K256++
460	add	r6,r6,r2			@ h+=X[i]
461	str	r2,[sp,#5*4]
462	eor	r2,r4,r5
463	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
464	and	r2,r2,r11
465	add	r6,r6,r3			@ h+=K256[i]
466	eor	r2,r2,r5			@ Ch(e,f,g)
467	eor	r0,r7,r7,ror#11
468	add	r6,r6,r2			@ h+=Ch(e,f,g)
469#if 5==31
470	and	r3,r3,#0xff
471	cmp	r3,#0xf2			@ done?
472#endif
473#if 5<15
474# if __ARM_ARCH__>=7
475	ldr	r2,[r1],#4			@ prefetch
476# else
477	ldrb	r2,[r1,#3]
478# endif
479	eor	r3,r7,r8			@ a^b, b^c in next round
480#else
481	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
482	eor	r3,r7,r8			@ a^b, b^c in next round
483	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
484#endif
485	eor	r0,r0,r7,ror#20	@ Sigma0(a)
486	and	r12,r12,r3			@ (b^c)&=(a^b)
487	add	r10,r10,r6			@ d+=h
488	eor	r12,r12,r8			@ Maj(a,b,c)
489	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
490	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
491#if __ARM_ARCH__>=7
492	@ ldr	r2,[r1],#4			@ 6
493# if 6==15
494	str	r1,[sp,#17*4]			@ make room for r1
495# endif
496	eor	r0,r10,r10,ror#5
497	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
498	eor	r0,r0,r10,ror#19	@ Sigma1(e)
499# ifndef __ARMEB__
500	rev	r2,r2
501# endif
502#else
503	@ ldrb	r2,[r1,#3]			@ 6
504	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
505	ldrb	r12,[r1,#2]
506	ldrb	r0,[r1,#1]
507	orr	r2,r2,r12,lsl#8
508	ldrb	r12,[r1],#4
509	orr	r2,r2,r0,lsl#16
510# if 6==15
511	str	r1,[sp,#17*4]			@ make room for r1
512# endif
513	eor	r0,r10,r10,ror#5
514	orr	r2,r2,r12,lsl#24
515	eor	r0,r0,r10,ror#19	@ Sigma1(e)
516#endif
517	ldr	r12,[r14],#4			@ *K256++
518	add	r5,r5,r2			@ h+=X[i]
519	str	r2,[sp,#6*4]
520	eor	r2,r11,r4
521	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
522	and	r2,r2,r10
523	add	r5,r5,r12			@ h+=K256[i]
524	eor	r2,r2,r4			@ Ch(e,f,g)
525	eor	r0,r6,r6,ror#11
526	add	r5,r5,r2			@ h+=Ch(e,f,g)
527#if 6==31
528	and	r12,r12,#0xff
529	cmp	r12,#0xf2			@ done?
530#endif
531#if 6<15
532# if __ARM_ARCH__>=7
533	ldr	r2,[r1],#4			@ prefetch
534# else
535	ldrb	r2,[r1,#3]
536# endif
537	eor	r12,r6,r7			@ a^b, b^c in next round
538#else
539	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
540	eor	r12,r6,r7			@ a^b, b^c in next round
541	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
542#endif
543	eor	r0,r0,r6,ror#20	@ Sigma0(a)
544	and	r3,r3,r12			@ (b^c)&=(a^b)
545	add	r9,r9,r5			@ d+=h
546	eor	r3,r3,r7			@ Maj(a,b,c)
547	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
548	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
549#if __ARM_ARCH__>=7
550	@ ldr	r2,[r1],#4			@ 7
551# if 7==15
552	str	r1,[sp,#17*4]			@ make room for r1
553# endif
554	eor	r0,r9,r9,ror#5
555	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
556	eor	r0,r0,r9,ror#19	@ Sigma1(e)
557# ifndef __ARMEB__
558	rev	r2,r2
559# endif
560#else
561	@ ldrb	r2,[r1,#3]			@ 7
562	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
563	ldrb	r3,[r1,#2]
564	ldrb	r0,[r1,#1]
565	orr	r2,r2,r3,lsl#8
566	ldrb	r3,[r1],#4
567	orr	r2,r2,r0,lsl#16
568# if 7==15
569	str	r1,[sp,#17*4]			@ make room for r1
570# endif
571	eor	r0,r9,r9,ror#5
572	orr	r2,r2,r3,lsl#24
573	eor	r0,r0,r9,ror#19	@ Sigma1(e)
574#endif
575	ldr	r3,[r14],#4			@ *K256++
576	add	r4,r4,r2			@ h+=X[i]
577	str	r2,[sp,#7*4]
578	eor	r2,r10,r11
579	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
580	and	r2,r2,r9
581	add	r4,r4,r3			@ h+=K256[i]
582	eor	r2,r2,r11			@ Ch(e,f,g)
583	eor	r0,r5,r5,ror#11
584	add	r4,r4,r2			@ h+=Ch(e,f,g)
585#if 7==31
586	and	r3,r3,#0xff
587	cmp	r3,#0xf2			@ done?
588#endif
589#if 7<15
590# if __ARM_ARCH__>=7
591	ldr	r2,[r1],#4			@ prefetch
592# else
593	ldrb	r2,[r1,#3]
594# endif
595	eor	r3,r5,r6			@ a^b, b^c in next round
596#else
597	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
598	eor	r3,r5,r6			@ a^b, b^c in next round
599	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
600#endif
601	eor	r0,r0,r5,ror#20	@ Sigma0(a)
602	and	r12,r12,r3			@ (b^c)&=(a^b)
603	add	r8,r8,r4			@ d+=h
604	eor	r12,r12,r6			@ Maj(a,b,c)
605	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
606	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
607#if __ARM_ARCH__>=7
608	@ ldr	r2,[r1],#4			@ 8
609# if 8==15
610	str	r1,[sp,#17*4]			@ make room for r1
611# endif
612	eor	r0,r8,r8,ror#5
613	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
614	eor	r0,r0,r8,ror#19	@ Sigma1(e)
615# ifndef __ARMEB__
616	rev	r2,r2
617# endif
618#else
619	@ ldrb	r2,[r1,#3]			@ 8
620	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
621	ldrb	r12,[r1,#2]
622	ldrb	r0,[r1,#1]
623	orr	r2,r2,r12,lsl#8
624	ldrb	r12,[r1],#4
625	orr	r2,r2,r0,lsl#16
626# if 8==15
627	str	r1,[sp,#17*4]			@ make room for r1
628# endif
629	eor	r0,r8,r8,ror#5
630	orr	r2,r2,r12,lsl#24
631	eor	r0,r0,r8,ror#19	@ Sigma1(e)
632#endif
633	ldr	r12,[r14],#4			@ *K256++
634	add	r11,r11,r2			@ h+=X[i]
635	str	r2,[sp,#8*4]
636	eor	r2,r9,r10
637	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
638	and	r2,r2,r8
639	add	r11,r11,r12			@ h+=K256[i]
640	eor	r2,r2,r10			@ Ch(e,f,g)
641	eor	r0,r4,r4,ror#11
642	add	r11,r11,r2			@ h+=Ch(e,f,g)
643#if 8==31
644	and	r12,r12,#0xff
645	cmp	r12,#0xf2			@ done?
646#endif
647#if 8<15
648# if __ARM_ARCH__>=7
649	ldr	r2,[r1],#4			@ prefetch
650# else
651	ldrb	r2,[r1,#3]
652# endif
653	eor	r12,r4,r5			@ a^b, b^c in next round
654#else
655	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
656	eor	r12,r4,r5			@ a^b, b^c in next round
657	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
658#endif
659	eor	r0,r0,r4,ror#20	@ Sigma0(a)
660	and	r3,r3,r12			@ (b^c)&=(a^b)
661	add	r7,r7,r11			@ d+=h
662	eor	r3,r3,r5			@ Maj(a,b,c)
663	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
664	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
665#if __ARM_ARCH__>=7
666	@ ldr	r2,[r1],#4			@ 9
667# if 9==15
668	str	r1,[sp,#17*4]			@ make room for r1
669# endif
670	eor	r0,r7,r7,ror#5
671	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
672	eor	r0,r0,r7,ror#19	@ Sigma1(e)
673# ifndef __ARMEB__
674	rev	r2,r2
675# endif
676#else
677	@ ldrb	r2,[r1,#3]			@ 9
678	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
679	ldrb	r3,[r1,#2]
680	ldrb	r0,[r1,#1]
681	orr	r2,r2,r3,lsl#8
682	ldrb	r3,[r1],#4
683	orr	r2,r2,r0,lsl#16
684# if 9==15
685	str	r1,[sp,#17*4]			@ make room for r1
686# endif
687	eor	r0,r7,r7,ror#5
688	orr	r2,r2,r3,lsl#24
689	eor	r0,r0,r7,ror#19	@ Sigma1(e)
690#endif
691	ldr	r3,[r14],#4			@ *K256++
692	add	r10,r10,r2			@ h+=X[i]
693	str	r2,[sp,#9*4]
694	eor	r2,r8,r9
695	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
696	and	r2,r2,r7
697	add	r10,r10,r3			@ h+=K256[i]
698	eor	r2,r2,r9			@ Ch(e,f,g)
699	eor	r0,r11,r11,ror#11
700	add	r10,r10,r2			@ h+=Ch(e,f,g)
701#if 9==31
702	and	r3,r3,#0xff
703	cmp	r3,#0xf2			@ done?
704#endif
705#if 9<15
706# if __ARM_ARCH__>=7
707	ldr	r2,[r1],#4			@ prefetch
708# else
709	ldrb	r2,[r1,#3]
710# endif
711	eor	r3,r11,r4			@ a^b, b^c in next round
712#else
713	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
714	eor	r3,r11,r4			@ a^b, b^c in next round
715	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
716#endif
717	eor	r0,r0,r11,ror#20	@ Sigma0(a)
718	and	r12,r12,r3			@ (b^c)&=(a^b)
719	add	r6,r6,r10			@ d+=h
720	eor	r12,r12,r4			@ Maj(a,b,c)
721	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
722	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
723#if __ARM_ARCH__>=7
724	@ ldr	r2,[r1],#4			@ 10
725# if 10==15
726	str	r1,[sp,#17*4]			@ make room for r1
727# endif
728	eor	r0,r6,r6,ror#5
729	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
730	eor	r0,r0,r6,ror#19	@ Sigma1(e)
731# ifndef __ARMEB__
732	rev	r2,r2
733# endif
734#else
735	@ ldrb	r2,[r1,#3]			@ 10
736	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
737	ldrb	r12,[r1,#2]
738	ldrb	r0,[r1,#1]
739	orr	r2,r2,r12,lsl#8
740	ldrb	r12,[r1],#4
741	orr	r2,r2,r0,lsl#16
742# if 10==15
743	str	r1,[sp,#17*4]			@ make room for r1
744# endif
745	eor	r0,r6,r6,ror#5
746	orr	r2,r2,r12,lsl#24
747	eor	r0,r0,r6,ror#19	@ Sigma1(e)
748#endif
749	ldr	r12,[r14],#4			@ *K256++
750	add	r9,r9,r2			@ h+=X[i]
751	str	r2,[sp,#10*4]
752	eor	r2,r7,r8
753	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
754	and	r2,r2,r6
755	add	r9,r9,r12			@ h+=K256[i]
756	eor	r2,r2,r8			@ Ch(e,f,g)
757	eor	r0,r10,r10,ror#11
758	add	r9,r9,r2			@ h+=Ch(e,f,g)
759#if 10==31
760	and	r12,r12,#0xff
761	cmp	r12,#0xf2			@ done?
762#endif
763#if 10<15
764# if __ARM_ARCH__>=7
765	ldr	r2,[r1],#4			@ prefetch
766# else
767	ldrb	r2,[r1,#3]
768# endif
769	eor	r12,r10,r11			@ a^b, b^c in next round
770#else
771	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
772	eor	r12,r10,r11			@ a^b, b^c in next round
773	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
774#endif
775	eor	r0,r0,r10,ror#20	@ Sigma0(a)
776	and	r3,r3,r12			@ (b^c)&=(a^b)
777	add	r5,r5,r9			@ d+=h
778	eor	r3,r3,r11			@ Maj(a,b,c)
779	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
780	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
781#if __ARM_ARCH__>=7
782	@ ldr	r2,[r1],#4			@ 11
783# if 11==15
784	str	r1,[sp,#17*4]			@ make room for r1
785# endif
786	eor	r0,r5,r5,ror#5
787	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
788	eor	r0,r0,r5,ror#19	@ Sigma1(e)
789# ifndef __ARMEB__
790	rev	r2,r2
791# endif
792#else
793	@ ldrb	r2,[r1,#3]			@ 11
794	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
795	ldrb	r3,[r1,#2]
796	ldrb	r0,[r1,#1]
797	orr	r2,r2,r3,lsl#8
798	ldrb	r3,[r1],#4
799	orr	r2,r2,r0,lsl#16
800# if 11==15
801	str	r1,[sp,#17*4]			@ make room for r1
802# endif
803	eor	r0,r5,r5,ror#5
804	orr	r2,r2,r3,lsl#24
805	eor	r0,r0,r5,ror#19	@ Sigma1(e)
806#endif
807	ldr	r3,[r14],#4			@ *K256++
808	add	r8,r8,r2			@ h+=X[i]
809	str	r2,[sp,#11*4]
810	eor	r2,r6,r7
811	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
812	and	r2,r2,r5
813	add	r8,r8,r3			@ h+=K256[i]
814	eor	r2,r2,r7			@ Ch(e,f,g)
815	eor	r0,r9,r9,ror#11
816	add	r8,r8,r2			@ h+=Ch(e,f,g)
817#if 11==31
818	and	r3,r3,#0xff
819	cmp	r3,#0xf2			@ done?
820#endif
821#if 11<15
822# if __ARM_ARCH__>=7
823	ldr	r2,[r1],#4			@ prefetch
824# else
825	ldrb	r2,[r1,#3]
826# endif
827	eor	r3,r9,r10			@ a^b, b^c in next round
828#else
829	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
830	eor	r3,r9,r10			@ a^b, b^c in next round
831	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
832#endif
833	eor	r0,r0,r9,ror#20	@ Sigma0(a)
834	and	r12,r12,r3			@ (b^c)&=(a^b)
835	add	r4,r4,r8			@ d+=h
836	eor	r12,r12,r10			@ Maj(a,b,c)
837	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
838	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
839#if __ARM_ARCH__>=7
840	@ ldr	r2,[r1],#4			@ 12
841# if 12==15
842	str	r1,[sp,#17*4]			@ make room for r1
843# endif
844	eor	r0,r4,r4,ror#5
845	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
846	eor	r0,r0,r4,ror#19	@ Sigma1(e)
847# ifndef __ARMEB__
848	rev	r2,r2
849# endif
850#else
851	@ ldrb	r2,[r1,#3]			@ 12
852	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
853	ldrb	r12,[r1,#2]
854	ldrb	r0,[r1,#1]
855	orr	r2,r2,r12,lsl#8
856	ldrb	r12,[r1],#4
857	orr	r2,r2,r0,lsl#16
858# if 12==15
859	str	r1,[sp,#17*4]			@ make room for r1
860# endif
861	eor	r0,r4,r4,ror#5
862	orr	r2,r2,r12,lsl#24
863	eor	r0,r0,r4,ror#19	@ Sigma1(e)
864#endif
865	ldr	r12,[r14],#4			@ *K256++
866	add	r7,r7,r2			@ h+=X[i]
867	str	r2,[sp,#12*4]
868	eor	r2,r5,r6
869	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
870	and	r2,r2,r4
871	add	r7,r7,r12			@ h+=K256[i]
872	eor	r2,r2,r6			@ Ch(e,f,g)
873	eor	r0,r8,r8,ror#11
874	add	r7,r7,r2			@ h+=Ch(e,f,g)
875#if 12==31
876	and	r12,r12,#0xff
877	cmp	r12,#0xf2			@ done?
878#endif
879#if 12<15
880# if __ARM_ARCH__>=7
881	ldr	r2,[r1],#4			@ prefetch
882# else
883	ldrb	r2,[r1,#3]
884# endif
885	eor	r12,r8,r9			@ a^b, b^c in next round
886#else
887	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
888	eor	r12,r8,r9			@ a^b, b^c in next round
889	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
890#endif
891	eor	r0,r0,r8,ror#20	@ Sigma0(a)
892	and	r3,r3,r12			@ (b^c)&=(a^b)
893	add	r11,r11,r7			@ d+=h
894	eor	r3,r3,r9			@ Maj(a,b,c)
895	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
896	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
897#if __ARM_ARCH__>=7
898	@ ldr	r2,[r1],#4			@ 13
899# if 13==15
900	str	r1,[sp,#17*4]			@ make room for r1
901# endif
902	eor	r0,r11,r11,ror#5
903	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
904	eor	r0,r0,r11,ror#19	@ Sigma1(e)
905# ifndef __ARMEB__
906	rev	r2,r2
907# endif
908#else
909	@ ldrb	r2,[r1,#3]			@ 13
910	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
911	ldrb	r3,[r1,#2]
912	ldrb	r0,[r1,#1]
913	orr	r2,r2,r3,lsl#8
914	ldrb	r3,[r1],#4
915	orr	r2,r2,r0,lsl#16
916# if 13==15
917	str	r1,[sp,#17*4]			@ make room for r1
918# endif
919	eor	r0,r11,r11,ror#5
920	orr	r2,r2,r3,lsl#24
921	eor	r0,r0,r11,ror#19	@ Sigma1(e)
922#endif
923	ldr	r3,[r14],#4			@ *K256++
924	add	r6,r6,r2			@ h+=X[i]
925	str	r2,[sp,#13*4]
926	eor	r2,r4,r5
927	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
928	and	r2,r2,r11
929	add	r6,r6,r3			@ h+=K256[i]
930	eor	r2,r2,r5			@ Ch(e,f,g)
931	eor	r0,r7,r7,ror#11
932	add	r6,r6,r2			@ h+=Ch(e,f,g)
933#if 13==31
934	and	r3,r3,#0xff
935	cmp	r3,#0xf2			@ done?
936#endif
937#if 13<15
938# if __ARM_ARCH__>=7
939	ldr	r2,[r1],#4			@ prefetch
940# else
941	ldrb	r2,[r1,#3]
942# endif
943	eor	r3,r7,r8			@ a^b, b^c in next round
944#else
945	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
946	eor	r3,r7,r8			@ a^b, b^c in next round
947	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
948#endif
949	eor	r0,r0,r7,ror#20	@ Sigma0(a)
950	and	r12,r12,r3			@ (b^c)&=(a^b)
951	add	r10,r10,r6			@ d+=h
952	eor	r12,r12,r8			@ Maj(a,b,c)
953	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
954	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
955#if __ARM_ARCH__>=7
956	@ ldr	r2,[r1],#4			@ 14
957# if 14==15
958	str	r1,[sp,#17*4]			@ make room for r1
959# endif
960	eor	r0,r10,r10,ror#5
961	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
962	eor	r0,r0,r10,ror#19	@ Sigma1(e)
963# ifndef __ARMEB__
964	rev	r2,r2
965# endif
966#else
967	@ ldrb	r2,[r1,#3]			@ 14
968	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
969	ldrb	r12,[r1,#2]
970	ldrb	r0,[r1,#1]
971	orr	r2,r2,r12,lsl#8
972	ldrb	r12,[r1],#4
973	orr	r2,r2,r0,lsl#16
974# if 14==15
975	str	r1,[sp,#17*4]			@ make room for r1
976# endif
977	eor	r0,r10,r10,ror#5
978	orr	r2,r2,r12,lsl#24
979	eor	r0,r0,r10,ror#19	@ Sigma1(e)
980#endif
981	ldr	r12,[r14],#4			@ *K256++
982	add	r5,r5,r2			@ h+=X[i]
983	str	r2,[sp,#14*4]
984	eor	r2,r11,r4
985	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
986	and	r2,r2,r10
987	add	r5,r5,r12			@ h+=K256[i]
988	eor	r2,r2,r4			@ Ch(e,f,g)
989	eor	r0,r6,r6,ror#11
990	add	r5,r5,r2			@ h+=Ch(e,f,g)
991#if 14==31
992	and	r12,r12,#0xff
993	cmp	r12,#0xf2			@ done?
994#endif
995#if 14<15
996# if __ARM_ARCH__>=7
997	ldr	r2,[r1],#4			@ prefetch
998# else
999	ldrb	r2,[r1,#3]
1000# endif
1001	eor	r12,r6,r7			@ a^b, b^c in next round
1002#else
1003	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1004	eor	r12,r6,r7			@ a^b, b^c in next round
1005	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1006#endif
1007	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1008	and	r3,r3,r12			@ (b^c)&=(a^b)
1009	add	r9,r9,r5			@ d+=h
1010	eor	r3,r3,r7			@ Maj(a,b,c)
1011	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1012	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1013#if __ARM_ARCH__>=7
1014	@ ldr	r2,[r1],#4			@ 15
1015# if 15==15
1016	str	r1,[sp,#17*4]			@ make room for r1
1017# endif
1018	eor	r0,r9,r9,ror#5
1019	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1020	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1021# ifndef __ARMEB__
1022	rev	r2,r2
1023# endif
1024#else
1025	@ ldrb	r2,[r1,#3]			@ 15
1026	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1027	ldrb	r3,[r1,#2]
1028	ldrb	r0,[r1,#1]
1029	orr	r2,r2,r3,lsl#8
1030	ldrb	r3,[r1],#4
1031	orr	r2,r2,r0,lsl#16
1032# if 15==15
1033	str	r1,[sp,#17*4]			@ make room for r1
1034# endif
1035	eor	r0,r9,r9,ror#5
1036	orr	r2,r2,r3,lsl#24
1037	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1038#endif
1039	ldr	r3,[r14],#4			@ *K256++
1040	add	r4,r4,r2			@ h+=X[i]
1041	str	r2,[sp,#15*4]
1042	eor	r2,r10,r11
1043	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1044	and	r2,r2,r9
1045	add	r4,r4,r3			@ h+=K256[i]
1046	eor	r2,r2,r11			@ Ch(e,f,g)
1047	eor	r0,r5,r5,ror#11
1048	add	r4,r4,r2			@ h+=Ch(e,f,g)
1049#if 15==31
1050	and	r3,r3,#0xff
1051	cmp	r3,#0xf2			@ done?
1052#endif
1053#if 15<15
1054# if __ARM_ARCH__>=7
1055	ldr	r2,[r1],#4			@ prefetch
1056# else
1057	ldrb	r2,[r1,#3]
1058# endif
1059	eor	r3,r5,r6			@ a^b, b^c in next round
1060#else
1061	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1062	eor	r3,r5,r6			@ a^b, b^c in next round
1063	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1064#endif
1065	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1066	and	r12,r12,r3			@ (b^c)&=(a^b)
1067	add	r8,r8,r4			@ d+=h
1068	eor	r12,r12,r6			@ Maj(a,b,c)
1069	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1070	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1071Lrounds_16_xx:
1072	@ ldr	r2,[sp,#1*4]		@ 16
1073	@ ldr	r1,[sp,#14*4]
1074	mov	r0,r2,ror#7
1075	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1076	mov	r12,r1,ror#17
1077	eor	r0,r0,r2,ror#18
1078	eor	r12,r12,r1,ror#19
1079	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1080	ldr	r2,[sp,#0*4]
1081	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1082	ldr	r1,[sp,#9*4]
1083
1084	add	r12,r12,r0
1085	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1086	add	r2,r2,r12
1087	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1088	add	r2,r2,r1			@ X[i]
1089	ldr	r12,[r14],#4			@ *K256++
1090	add	r11,r11,r2			@ h+=X[i]
1091	str	r2,[sp,#0*4]
1092	eor	r2,r9,r10
1093	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1094	and	r2,r2,r8
1095	add	r11,r11,r12			@ h+=K256[i]
1096	eor	r2,r2,r10			@ Ch(e,f,g)
1097	eor	r0,r4,r4,ror#11
1098	add	r11,r11,r2			@ h+=Ch(e,f,g)
1099#if 16==31
1100	and	r12,r12,#0xff
1101	cmp	r12,#0xf2			@ done?
1102#endif
1103#if 16<15
1104# if __ARM_ARCH__>=7
1105	ldr	r2,[r1],#4			@ prefetch
1106# else
1107	ldrb	r2,[r1,#3]
1108# endif
1109	eor	r12,r4,r5			@ a^b, b^c in next round
1110#else
1111	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1112	eor	r12,r4,r5			@ a^b, b^c in next round
1113	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1114#endif
1115	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1116	and	r3,r3,r12			@ (b^c)&=(a^b)
1117	add	r7,r7,r11			@ d+=h
1118	eor	r3,r3,r5			@ Maj(a,b,c)
1119	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1120	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1121	@ ldr	r2,[sp,#2*4]		@ 17
1122	@ ldr	r1,[sp,#15*4]
1123	mov	r0,r2,ror#7
1124	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1125	mov	r3,r1,ror#17
1126	eor	r0,r0,r2,ror#18
1127	eor	r3,r3,r1,ror#19
1128	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1129	ldr	r2,[sp,#1*4]
1130	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1131	ldr	r1,[sp,#10*4]
1132
1133	add	r3,r3,r0
1134	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1135	add	r2,r2,r3
1136	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1137	add	r2,r2,r1			@ X[i]
1138	ldr	r3,[r14],#4			@ *K256++
1139	add	r10,r10,r2			@ h+=X[i]
1140	str	r2,[sp,#1*4]
1141	eor	r2,r8,r9
1142	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1143	and	r2,r2,r7
1144	add	r10,r10,r3			@ h+=K256[i]
1145	eor	r2,r2,r9			@ Ch(e,f,g)
1146	eor	r0,r11,r11,ror#11
1147	add	r10,r10,r2			@ h+=Ch(e,f,g)
1148#if 17==31
1149	and	r3,r3,#0xff
1150	cmp	r3,#0xf2			@ done?
1151#endif
1152#if 17<15
1153# if __ARM_ARCH__>=7
1154	ldr	r2,[r1],#4			@ prefetch
1155# else
1156	ldrb	r2,[r1,#3]
1157# endif
1158	eor	r3,r11,r4			@ a^b, b^c in next round
1159#else
1160	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1161	eor	r3,r11,r4			@ a^b, b^c in next round
1162	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1163#endif
1164	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1165	and	r12,r12,r3			@ (b^c)&=(a^b)
1166	add	r6,r6,r10			@ d+=h
1167	eor	r12,r12,r4			@ Maj(a,b,c)
1168	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1169	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1170	@ ldr	r2,[sp,#3*4]		@ 18
1171	@ ldr	r1,[sp,#0*4]
1172	mov	r0,r2,ror#7
1173	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1174	mov	r12,r1,ror#17
1175	eor	r0,r0,r2,ror#18
1176	eor	r12,r12,r1,ror#19
1177	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1178	ldr	r2,[sp,#2*4]
1179	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1180	ldr	r1,[sp,#11*4]
1181
1182	add	r12,r12,r0
1183	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1184	add	r2,r2,r12
1185	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1186	add	r2,r2,r1			@ X[i]
1187	ldr	r12,[r14],#4			@ *K256++
1188	add	r9,r9,r2			@ h+=X[i]
1189	str	r2,[sp,#2*4]
1190	eor	r2,r7,r8
1191	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1192	and	r2,r2,r6
1193	add	r9,r9,r12			@ h+=K256[i]
1194	eor	r2,r2,r8			@ Ch(e,f,g)
1195	eor	r0,r10,r10,ror#11
1196	add	r9,r9,r2			@ h+=Ch(e,f,g)
1197#if 18==31
1198	and	r12,r12,#0xff
1199	cmp	r12,#0xf2			@ done?
1200#endif
1201#if 18<15
1202# if __ARM_ARCH__>=7
1203	ldr	r2,[r1],#4			@ prefetch
1204# else
1205	ldrb	r2,[r1,#3]
1206# endif
1207	eor	r12,r10,r11			@ a^b, b^c in next round
1208#else
1209	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1210	eor	r12,r10,r11			@ a^b, b^c in next round
1211	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1212#endif
1213	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1214	and	r3,r3,r12			@ (b^c)&=(a^b)
1215	add	r5,r5,r9			@ d+=h
1216	eor	r3,r3,r11			@ Maj(a,b,c)
1217	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1218	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1219	@ ldr	r2,[sp,#4*4]		@ 19
1220	@ ldr	r1,[sp,#1*4]
1221	mov	r0,r2,ror#7
1222	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1223	mov	r3,r1,ror#17
1224	eor	r0,r0,r2,ror#18
1225	eor	r3,r3,r1,ror#19
1226	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1227	ldr	r2,[sp,#3*4]
1228	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1229	ldr	r1,[sp,#12*4]
1230
1231	add	r3,r3,r0
1232	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1233	add	r2,r2,r3
1234	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1235	add	r2,r2,r1			@ X[i]
1236	ldr	r3,[r14],#4			@ *K256++
1237	add	r8,r8,r2			@ h+=X[i]
1238	str	r2,[sp,#3*4]
1239	eor	r2,r6,r7
1240	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1241	and	r2,r2,r5
1242	add	r8,r8,r3			@ h+=K256[i]
1243	eor	r2,r2,r7			@ Ch(e,f,g)
1244	eor	r0,r9,r9,ror#11
1245	add	r8,r8,r2			@ h+=Ch(e,f,g)
1246#if 19==31
1247	and	r3,r3,#0xff
1248	cmp	r3,#0xf2			@ done?
1249#endif
1250#if 19<15
1251# if __ARM_ARCH__>=7
1252	ldr	r2,[r1],#4			@ prefetch
1253# else
1254	ldrb	r2,[r1,#3]
1255# endif
1256	eor	r3,r9,r10			@ a^b, b^c in next round
1257#else
1258	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1259	eor	r3,r9,r10			@ a^b, b^c in next round
1260	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1261#endif
1262	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1263	and	r12,r12,r3			@ (b^c)&=(a^b)
1264	add	r4,r4,r8			@ d+=h
1265	eor	r12,r12,r10			@ Maj(a,b,c)
1266	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1267	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1268	@ ldr	r2,[sp,#5*4]		@ 20
1269	@ ldr	r1,[sp,#2*4]
1270	mov	r0,r2,ror#7
1271	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1272	mov	r12,r1,ror#17
1273	eor	r0,r0,r2,ror#18
1274	eor	r12,r12,r1,ror#19
1275	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1276	ldr	r2,[sp,#4*4]
1277	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1278	ldr	r1,[sp,#13*4]
1279
1280	add	r12,r12,r0
1281	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1282	add	r2,r2,r12
1283	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1284	add	r2,r2,r1			@ X[i]
1285	ldr	r12,[r14],#4			@ *K256++
1286	add	r7,r7,r2			@ h+=X[i]
1287	str	r2,[sp,#4*4]
1288	eor	r2,r5,r6
1289	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1290	and	r2,r2,r4
1291	add	r7,r7,r12			@ h+=K256[i]
1292	eor	r2,r2,r6			@ Ch(e,f,g)
1293	eor	r0,r8,r8,ror#11
1294	add	r7,r7,r2			@ h+=Ch(e,f,g)
1295#if 20==31
1296	and	r12,r12,#0xff
1297	cmp	r12,#0xf2			@ done?
1298#endif
1299#if 20<15
1300# if __ARM_ARCH__>=7
1301	ldr	r2,[r1],#4			@ prefetch
1302# else
1303	ldrb	r2,[r1,#3]
1304# endif
1305	eor	r12,r8,r9			@ a^b, b^c in next round
1306#else
1307	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1308	eor	r12,r8,r9			@ a^b, b^c in next round
1309	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1310#endif
1311	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1312	and	r3,r3,r12			@ (b^c)&=(a^b)
1313	add	r11,r11,r7			@ d+=h
1314	eor	r3,r3,r9			@ Maj(a,b,c)
1315	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1316	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1317	@ ldr	r2,[sp,#6*4]		@ 21
1318	@ ldr	r1,[sp,#3*4]
1319	mov	r0,r2,ror#7
1320	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1321	mov	r3,r1,ror#17
1322	eor	r0,r0,r2,ror#18
1323	eor	r3,r3,r1,ror#19
1324	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1325	ldr	r2,[sp,#5*4]
1326	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1327	ldr	r1,[sp,#14*4]
1328
1329	add	r3,r3,r0
1330	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1331	add	r2,r2,r3
1332	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1333	add	r2,r2,r1			@ X[i]
1334	ldr	r3,[r14],#4			@ *K256++
1335	add	r6,r6,r2			@ h+=X[i]
1336	str	r2,[sp,#5*4]
1337	eor	r2,r4,r5
1338	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1339	and	r2,r2,r11
1340	add	r6,r6,r3			@ h+=K256[i]
1341	eor	r2,r2,r5			@ Ch(e,f,g)
1342	eor	r0,r7,r7,ror#11
1343	add	r6,r6,r2			@ h+=Ch(e,f,g)
1344#if 21==31
1345	and	r3,r3,#0xff
1346	cmp	r3,#0xf2			@ done?
1347#endif
1348#if 21<15
1349# if __ARM_ARCH__>=7
1350	ldr	r2,[r1],#4			@ prefetch
1351# else
1352	ldrb	r2,[r1,#3]
1353# endif
1354	eor	r3,r7,r8			@ a^b, b^c in next round
1355#else
1356	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1357	eor	r3,r7,r8			@ a^b, b^c in next round
1358	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1359#endif
1360	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1361	and	r12,r12,r3			@ (b^c)&=(a^b)
1362	add	r10,r10,r6			@ d+=h
1363	eor	r12,r12,r8			@ Maj(a,b,c)
1364	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1365	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1366	@ ldr	r2,[sp,#7*4]		@ 22
1367	@ ldr	r1,[sp,#4*4]
1368	mov	r0,r2,ror#7
1369	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1370	mov	r12,r1,ror#17
1371	eor	r0,r0,r2,ror#18
1372	eor	r12,r12,r1,ror#19
1373	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1374	ldr	r2,[sp,#6*4]
1375	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1376	ldr	r1,[sp,#15*4]
1377
1378	add	r12,r12,r0
1379	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1380	add	r2,r2,r12
1381	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1382	add	r2,r2,r1			@ X[i]
1383	ldr	r12,[r14],#4			@ *K256++
1384	add	r5,r5,r2			@ h+=X[i]
1385	str	r2,[sp,#6*4]
1386	eor	r2,r11,r4
1387	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1388	and	r2,r2,r10
1389	add	r5,r5,r12			@ h+=K256[i]
1390	eor	r2,r2,r4			@ Ch(e,f,g)
1391	eor	r0,r6,r6,ror#11
1392	add	r5,r5,r2			@ h+=Ch(e,f,g)
1393#if 22==31
1394	and	r12,r12,#0xff
1395	cmp	r12,#0xf2			@ done?
1396#endif
1397#if 22<15
1398# if __ARM_ARCH__>=7
1399	ldr	r2,[r1],#4			@ prefetch
1400# else
1401	ldrb	r2,[r1,#3]
1402# endif
1403	eor	r12,r6,r7			@ a^b, b^c in next round
1404#else
1405	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1406	eor	r12,r6,r7			@ a^b, b^c in next round
1407	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1408#endif
1409	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1410	and	r3,r3,r12			@ (b^c)&=(a^b)
1411	add	r9,r9,r5			@ d+=h
1412	eor	r3,r3,r7			@ Maj(a,b,c)
1413	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1414	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1415	@ ldr	r2,[sp,#8*4]		@ 23
1416	@ ldr	r1,[sp,#5*4]
1417	mov	r0,r2,ror#7
1418	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1419	mov	r3,r1,ror#17
1420	eor	r0,r0,r2,ror#18
1421	eor	r3,r3,r1,ror#19
1422	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1423	ldr	r2,[sp,#7*4]
1424	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1425	ldr	r1,[sp,#0*4]
1426
1427	add	r3,r3,r0
1428	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1429	add	r2,r2,r3
1430	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1431	add	r2,r2,r1			@ X[i]
1432	ldr	r3,[r14],#4			@ *K256++
1433	add	r4,r4,r2			@ h+=X[i]
1434	str	r2,[sp,#7*4]
1435	eor	r2,r10,r11
1436	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1437	and	r2,r2,r9
1438	add	r4,r4,r3			@ h+=K256[i]
1439	eor	r2,r2,r11			@ Ch(e,f,g)
1440	eor	r0,r5,r5,ror#11
1441	add	r4,r4,r2			@ h+=Ch(e,f,g)
1442#if 23==31
1443	and	r3,r3,#0xff
1444	cmp	r3,#0xf2			@ done?
1445#endif
1446#if 23<15
1447# if __ARM_ARCH__>=7
1448	ldr	r2,[r1],#4			@ prefetch
1449# else
1450	ldrb	r2,[r1,#3]
1451# endif
1452	eor	r3,r5,r6			@ a^b, b^c in next round
1453#else
1454	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1455	eor	r3,r5,r6			@ a^b, b^c in next round
1456	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1457#endif
1458	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1459	and	r12,r12,r3			@ (b^c)&=(a^b)
1460	add	r8,r8,r4			@ d+=h
1461	eor	r12,r12,r6			@ Maj(a,b,c)
1462	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1463	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1464	@ ldr	r2,[sp,#9*4]		@ 24
1465	@ ldr	r1,[sp,#6*4]
1466	mov	r0,r2,ror#7
1467	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1468	mov	r12,r1,ror#17
1469	eor	r0,r0,r2,ror#18
1470	eor	r12,r12,r1,ror#19
1471	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1472	ldr	r2,[sp,#8*4]
1473	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1474	ldr	r1,[sp,#1*4]
1475
1476	add	r12,r12,r0
1477	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1478	add	r2,r2,r12
1479	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1480	add	r2,r2,r1			@ X[i]
1481	ldr	r12,[r14],#4			@ *K256++
1482	add	r11,r11,r2			@ h+=X[i]
1483	str	r2,[sp,#8*4]
1484	eor	r2,r9,r10
1485	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1486	and	r2,r2,r8
1487	add	r11,r11,r12			@ h+=K256[i]
1488	eor	r2,r2,r10			@ Ch(e,f,g)
1489	eor	r0,r4,r4,ror#11
1490	add	r11,r11,r2			@ h+=Ch(e,f,g)
1491#if 24==31
1492	and	r12,r12,#0xff
1493	cmp	r12,#0xf2			@ done?
1494#endif
1495#if 24<15
1496# if __ARM_ARCH__>=7
1497	ldr	r2,[r1],#4			@ prefetch
1498# else
1499	ldrb	r2,[r1,#3]
1500# endif
1501	eor	r12,r4,r5			@ a^b, b^c in next round
1502#else
1503	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1504	eor	r12,r4,r5			@ a^b, b^c in next round
1505	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1506#endif
1507	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1508	and	r3,r3,r12			@ (b^c)&=(a^b)
1509	add	r7,r7,r11			@ d+=h
1510	eor	r3,r3,r5			@ Maj(a,b,c)
1511	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1512	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1513	@ ldr	r2,[sp,#10*4]		@ 25
1514	@ ldr	r1,[sp,#7*4]
1515	mov	r0,r2,ror#7
1516	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1517	mov	r3,r1,ror#17
1518	eor	r0,r0,r2,ror#18
1519	eor	r3,r3,r1,ror#19
1520	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1521	ldr	r2,[sp,#9*4]
1522	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1523	ldr	r1,[sp,#2*4]
1524
1525	add	r3,r3,r0
1526	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1527	add	r2,r2,r3
1528	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1529	add	r2,r2,r1			@ X[i]
1530	ldr	r3,[r14],#4			@ *K256++
1531	add	r10,r10,r2			@ h+=X[i]
1532	str	r2,[sp,#9*4]
1533	eor	r2,r8,r9
1534	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1535	and	r2,r2,r7
1536	add	r10,r10,r3			@ h+=K256[i]
1537	eor	r2,r2,r9			@ Ch(e,f,g)
1538	eor	r0,r11,r11,ror#11
1539	add	r10,r10,r2			@ h+=Ch(e,f,g)
1540#if 25==31
1541	and	r3,r3,#0xff
1542	cmp	r3,#0xf2			@ done?
1543#endif
1544#if 25<15
1545# if __ARM_ARCH__>=7
1546	ldr	r2,[r1],#4			@ prefetch
1547# else
1548	ldrb	r2,[r1,#3]
1549# endif
1550	eor	r3,r11,r4			@ a^b, b^c in next round
1551#else
1552	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1553	eor	r3,r11,r4			@ a^b, b^c in next round
1554	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1555#endif
1556	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1557	and	r12,r12,r3			@ (b^c)&=(a^b)
1558	add	r6,r6,r10			@ d+=h
1559	eor	r12,r12,r4			@ Maj(a,b,c)
1560	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1561	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1562	@ ldr	r2,[sp,#11*4]		@ 26
1563	@ ldr	r1,[sp,#8*4]
1564	mov	r0,r2,ror#7
1565	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1566	mov	r12,r1,ror#17
1567	eor	r0,r0,r2,ror#18
1568	eor	r12,r12,r1,ror#19
1569	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1570	ldr	r2,[sp,#10*4]
1571	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1572	ldr	r1,[sp,#3*4]
1573
1574	add	r12,r12,r0
1575	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1576	add	r2,r2,r12
1577	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1578	add	r2,r2,r1			@ X[i]
1579	ldr	r12,[r14],#4			@ *K256++
1580	add	r9,r9,r2			@ h+=X[i]
1581	str	r2,[sp,#10*4]
1582	eor	r2,r7,r8
1583	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1584	and	r2,r2,r6
1585	add	r9,r9,r12			@ h+=K256[i]
1586	eor	r2,r2,r8			@ Ch(e,f,g)
1587	eor	r0,r10,r10,ror#11
1588	add	r9,r9,r2			@ h+=Ch(e,f,g)
1589#if 26==31
1590	and	r12,r12,#0xff
1591	cmp	r12,#0xf2			@ done?
1592#endif
1593#if 26<15
1594# if __ARM_ARCH__>=7
1595	ldr	r2,[r1],#4			@ prefetch
1596# else
1597	ldrb	r2,[r1,#3]
1598# endif
1599	eor	r12,r10,r11			@ a^b, b^c in next round
1600#else
1601	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1602	eor	r12,r10,r11			@ a^b, b^c in next round
1603	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1604#endif
1605	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1606	and	r3,r3,r12			@ (b^c)&=(a^b)
1607	add	r5,r5,r9			@ d+=h
1608	eor	r3,r3,r11			@ Maj(a,b,c)
1609	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1610	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1611	@ ldr	r2,[sp,#12*4]		@ 27
1612	@ ldr	r1,[sp,#9*4]
1613	mov	r0,r2,ror#7
1614	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1615	mov	r3,r1,ror#17
1616	eor	r0,r0,r2,ror#18
1617	eor	r3,r3,r1,ror#19
1618	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1619	ldr	r2,[sp,#11*4]
1620	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1621	ldr	r1,[sp,#4*4]
1622
1623	add	r3,r3,r0
1624	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1625	add	r2,r2,r3
1626	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1627	add	r2,r2,r1			@ X[i]
1628	ldr	r3,[r14],#4			@ *K256++
1629	add	r8,r8,r2			@ h+=X[i]
1630	str	r2,[sp,#11*4]
1631	eor	r2,r6,r7
1632	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1633	and	r2,r2,r5
1634	add	r8,r8,r3			@ h+=K256[i]
1635	eor	r2,r2,r7			@ Ch(e,f,g)
1636	eor	r0,r9,r9,ror#11
1637	add	r8,r8,r2			@ h+=Ch(e,f,g)
1638#if 27==31
1639	and	r3,r3,#0xff
1640	cmp	r3,#0xf2			@ done?
1641#endif
1642#if 27<15
1643# if __ARM_ARCH__>=7
1644	ldr	r2,[r1],#4			@ prefetch
1645# else
1646	ldrb	r2,[r1,#3]
1647# endif
1648	eor	r3,r9,r10			@ a^b, b^c in next round
1649#else
1650	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1651	eor	r3,r9,r10			@ a^b, b^c in next round
1652	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1653#endif
1654	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1655	and	r12,r12,r3			@ (b^c)&=(a^b)
1656	add	r4,r4,r8			@ d+=h
1657	eor	r12,r12,r10			@ Maj(a,b,c)
1658	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1659	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1660	@ ldr	r2,[sp,#13*4]		@ 28
1661	@ ldr	r1,[sp,#10*4]
1662	mov	r0,r2,ror#7
1663	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1664	mov	r12,r1,ror#17
1665	eor	r0,r0,r2,ror#18
1666	eor	r12,r12,r1,ror#19
1667	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1668	ldr	r2,[sp,#12*4]
1669	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1670	ldr	r1,[sp,#5*4]
1671
1672	add	r12,r12,r0
1673	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1674	add	r2,r2,r12
1675	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1676	add	r2,r2,r1			@ X[i]
1677	ldr	r12,[r14],#4			@ *K256++
1678	add	r7,r7,r2			@ h+=X[i]
1679	str	r2,[sp,#12*4]
1680	eor	r2,r5,r6
1681	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1682	and	r2,r2,r4
1683	add	r7,r7,r12			@ h+=K256[i]
1684	eor	r2,r2,r6			@ Ch(e,f,g)
1685	eor	r0,r8,r8,ror#11
1686	add	r7,r7,r2			@ h+=Ch(e,f,g)
1687#if 28==31
1688	and	r12,r12,#0xff
1689	cmp	r12,#0xf2			@ done?
1690#endif
1691#if 28<15
1692# if __ARM_ARCH__>=7
1693	ldr	r2,[r1],#4			@ prefetch
1694# else
1695	ldrb	r2,[r1,#3]
1696# endif
1697	eor	r12,r8,r9			@ a^b, b^c in next round
1698#else
1699	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1700	eor	r12,r8,r9			@ a^b, b^c in next round
1701	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1702#endif
1703	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1704	and	r3,r3,r12			@ (b^c)&=(a^b)
1705	add	r11,r11,r7			@ d+=h
1706	eor	r3,r3,r9			@ Maj(a,b,c)
1707	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1708	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1709	@ ldr	r2,[sp,#14*4]		@ 29
1710	@ ldr	r1,[sp,#11*4]
1711	mov	r0,r2,ror#7
1712	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1713	mov	r3,r1,ror#17
1714	eor	r0,r0,r2,ror#18
1715	eor	r3,r3,r1,ror#19
1716	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1717	ldr	r2,[sp,#13*4]
1718	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1719	ldr	r1,[sp,#6*4]
1720
1721	add	r3,r3,r0
1722	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1723	add	r2,r2,r3
1724	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1725	add	r2,r2,r1			@ X[i]
1726	ldr	r3,[r14],#4			@ *K256++
1727	add	r6,r6,r2			@ h+=X[i]
1728	str	r2,[sp,#13*4]
1729	eor	r2,r4,r5
1730	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1731	and	r2,r2,r11
1732	add	r6,r6,r3			@ h+=K256[i]
1733	eor	r2,r2,r5			@ Ch(e,f,g)
1734	eor	r0,r7,r7,ror#11
1735	add	r6,r6,r2			@ h+=Ch(e,f,g)
1736#if 29==31
1737	and	r3,r3,#0xff
1738	cmp	r3,#0xf2			@ done?
1739#endif
1740#if 29<15
1741# if __ARM_ARCH__>=7
1742	ldr	r2,[r1],#4			@ prefetch
1743# else
1744	ldrb	r2,[r1,#3]
1745# endif
1746	eor	r3,r7,r8			@ a^b, b^c in next round
1747#else
1748	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1749	eor	r3,r7,r8			@ a^b, b^c in next round
1750	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1751#endif
1752	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1753	and	r12,r12,r3			@ (b^c)&=(a^b)
1754	add	r10,r10,r6			@ d+=h
1755	eor	r12,r12,r8			@ Maj(a,b,c)
1756	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1757	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1758	@ ldr	r2,[sp,#15*4]		@ 30
1759	@ ldr	r1,[sp,#12*4]
1760	mov	r0,r2,ror#7
1761	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1762	mov	r12,r1,ror#17
1763	eor	r0,r0,r2,ror#18
1764	eor	r12,r12,r1,ror#19
1765	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1766	ldr	r2,[sp,#14*4]
1767	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1768	ldr	r1,[sp,#7*4]
1769
1770	add	r12,r12,r0
1771	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1772	add	r2,r2,r12
1773	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1774	add	r2,r2,r1			@ X[i]
1775	ldr	r12,[r14],#4			@ *K256++
1776	add	r5,r5,r2			@ h+=X[i]
1777	str	r2,[sp,#14*4]
1778	eor	r2,r11,r4
1779	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1780	and	r2,r2,r10
1781	add	r5,r5,r12			@ h+=K256[i]
1782	eor	r2,r2,r4			@ Ch(e,f,g)
1783	eor	r0,r6,r6,ror#11
1784	add	r5,r5,r2			@ h+=Ch(e,f,g)
1785#if 30==31
1786	and	r12,r12,#0xff
1787	cmp	r12,#0xf2			@ done?
1788#endif
1789#if 30<15
1790# if __ARM_ARCH__>=7
1791	ldr	r2,[r1],#4			@ prefetch
1792# else
1793	ldrb	r2,[r1,#3]
1794# endif
1795	eor	r12,r6,r7			@ a^b, b^c in next round
1796#else
1797	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1798	eor	r12,r6,r7			@ a^b, b^c in next round
1799	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1800#endif
1801	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1802	and	r3,r3,r12			@ (b^c)&=(a^b)
1803	add	r9,r9,r5			@ d+=h
1804	eor	r3,r3,r7			@ Maj(a,b,c)
1805	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1806	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1807	@ ldr	r2,[sp,#0*4]		@ 31
1808	@ ldr	r1,[sp,#13*4]
1809	mov	r0,r2,ror#7
1810	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1811	mov	r3,r1,ror#17
1812	eor	r0,r0,r2,ror#18
1813	eor	r3,r3,r1,ror#19
1814	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1815	ldr	r2,[sp,#15*4]
1816	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1817	ldr	r1,[sp,#8*4]
1818
1819	add	r3,r3,r0
1820	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1821	add	r2,r2,r3
1822	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1823	add	r2,r2,r1			@ X[i]
1824	ldr	r3,[r14],#4			@ *K256++
1825	add	r4,r4,r2			@ h+=X[i]
1826	str	r2,[sp,#15*4]
1827	eor	r2,r10,r11
1828	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1829	and	r2,r2,r9
1830	add	r4,r4,r3			@ h+=K256[i]
1831	eor	r2,r2,r11			@ Ch(e,f,g)
1832	eor	r0,r5,r5,ror#11
1833	add	r4,r4,r2			@ h+=Ch(e,f,g)
1834#if 31==31
1835	and	r3,r3,#0xff
1836	cmp	r3,#0xf2			@ done?
1837#endif
1838#if 31<15
1839# if __ARM_ARCH__>=7
1840	ldr	r2,[r1],#4			@ prefetch
1841# else
1842	ldrb	r2,[r1,#3]
1843# endif
1844	eor	r3,r5,r6			@ a^b, b^c in next round
1845#else
1846	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1847	eor	r3,r5,r6			@ a^b, b^c in next round
1848	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1849#endif
1850	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1851	and	r12,r12,r3			@ (b^c)&=(a^b)
1852	add	r8,r8,r4			@ d+=h
1853	eor	r12,r12,r6			@ Maj(a,b,c)
1854	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1855	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1856#if __ARM_ARCH__>=7
1857	ite	eq			@ Thumb2 thing, sanity check in ARM
1858#endif
1859	ldreq	r3,[sp,#16*4]		@ pull ctx
1860	bne	Lrounds_16_xx
1861
1862	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1863	ldr	r0,[r3,#0]
1864	ldr	r2,[r3,#4]
1865	ldr	r12,[r3,#8]
1866	add	r4,r4,r0
1867	ldr	r0,[r3,#12]
1868	add	r5,r5,r2
1869	ldr	r2,[r3,#16]
1870	add	r6,r6,r12
1871	ldr	r12,[r3,#20]
1872	add	r7,r7,r0
1873	ldr	r0,[r3,#24]
1874	add	r8,r8,r2
1875	ldr	r2,[r3,#28]
1876	add	r9,r9,r12
1877	ldr	r1,[sp,#17*4]		@ pull inp
1878	ldr	r12,[sp,#18*4]		@ pull inp+len
1879	add	r10,r10,r0
1880	add	r11,r11,r2
1881	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1882	cmp	r1,r12
1883	sub	r14,r14,#256	@ rewind Ktbl
1884	bne	Loop
1885
1886	add	sp,sp,#19*4	@ destroy frame
1887#if __ARM_ARCH__>=5
1888	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1889#else
1890	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1891	tst	lr,#1
1892	moveq	pc,lr			@ be binary compatible with V4, yet
1893.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1894#endif
1895
1896#if __ARM_MAX_ARCH__>=7
1897
1898
1899
1900.globl	_sha256_block_data_order_neon
1901.private_extern	_sha256_block_data_order_neon
1902#ifdef __thumb2__
1903.thumb_func	_sha256_block_data_order_neon
1904#endif
1905.align	5
1906.skip	16
1907_sha256_block_data_order_neon:
1908LNEON:
1909	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1910
1911	sub	r11,sp,#16*4+16
1912	adr	r14,K256
1913	bic	r11,r11,#15		@ align for 128-bit stores
1914	mov	r12,sp
1915	mov	sp,r11			@ alloca
1916	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1917
1918	vld1.8	{q0},[r1]!
1919	vld1.8	{q1},[r1]!
1920	vld1.8	{q2},[r1]!
1921	vld1.8	{q3},[r1]!
1922	vld1.32	{q8},[r14,:128]!
1923	vld1.32	{q9},[r14,:128]!
1924	vld1.32	{q10},[r14,:128]!
1925	vld1.32	{q11},[r14,:128]!
1926	vrev32.8	q0,q0		@ yes, even on
1927	str	r0,[sp,#64]
1928	vrev32.8	q1,q1		@ big-endian
1929	str	r1,[sp,#68]
1930	mov	r1,sp
1931	vrev32.8	q2,q2
1932	str	r2,[sp,#72]
1933	vrev32.8	q3,q3
1934	str	r12,[sp,#76]		@ save original sp
1935	vadd.i32	q8,q8,q0
1936	vadd.i32	q9,q9,q1
1937	vst1.32	{q8},[r1,:128]!
1938	vadd.i32	q10,q10,q2
1939	vst1.32	{q9},[r1,:128]!
1940	vadd.i32	q11,q11,q3
1941	vst1.32	{q10},[r1,:128]!
1942	vst1.32	{q11},[r1,:128]!
1943
1944	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1945	sub	r1,r1,#64
1946	ldr	r2,[sp,#0]
1947	eor	r12,r12,r12
1948	eor	r3,r5,r6
1949	b	L_00_48
1950
1951.align	4
1952L_00_48:
1953	vext.8	q8,q0,q1,#4
1954	add	r11,r11,r2
1955	eor	r2,r9,r10
1956	eor	r0,r8,r8,ror#5
1957	vext.8	q9,q2,q3,#4
1958	add	r4,r4,r12
1959	and	r2,r2,r8
1960	eor	r12,r0,r8,ror#19
1961	vshr.u32	q10,q8,#7
1962	eor	r0,r4,r4,ror#11
1963	eor	r2,r2,r10
1964	vadd.i32	q0,q0,q9
1965	add	r11,r11,r12,ror#6
1966	eor	r12,r4,r5
1967	vshr.u32	q9,q8,#3
1968	eor	r0,r0,r4,ror#20
1969	add	r11,r11,r2
1970	vsli.32	q10,q8,#25
1971	ldr	r2,[sp,#4]
1972	and	r3,r3,r12
1973	vshr.u32	q11,q8,#18
1974	add	r7,r7,r11
1975	add	r11,r11,r0,ror#2
1976	eor	r3,r3,r5
1977	veor	q9,q9,q10
1978	add	r10,r10,r2
1979	vsli.32	q11,q8,#14
1980	eor	r2,r8,r9
1981	eor	r0,r7,r7,ror#5
1982	vshr.u32	d24,d7,#17
1983	add	r11,r11,r3
1984	and	r2,r2,r7
1985	veor	q9,q9,q11
1986	eor	r3,r0,r7,ror#19
1987	eor	r0,r11,r11,ror#11
1988	vsli.32	d24,d7,#15
1989	eor	r2,r2,r9
1990	add	r10,r10,r3,ror#6
1991	vshr.u32	d25,d7,#10
1992	eor	r3,r11,r4
1993	eor	r0,r0,r11,ror#20
1994	vadd.i32	q0,q0,q9
1995	add	r10,r10,r2
1996	ldr	r2,[sp,#8]
1997	veor	d25,d25,d24
1998	and	r12,r12,r3
1999	add	r6,r6,r10
2000	vshr.u32	d24,d7,#19
2001	add	r10,r10,r0,ror#2
2002	eor	r12,r12,r4
2003	vsli.32	d24,d7,#13
2004	add	r9,r9,r2
2005	eor	r2,r7,r8
2006	veor	d25,d25,d24
2007	eor	r0,r6,r6,ror#5
2008	add	r10,r10,r12
2009	vadd.i32	d0,d0,d25
2010	and	r2,r2,r6
2011	eor	r12,r0,r6,ror#19
2012	vshr.u32	d24,d0,#17
2013	eor	r0,r10,r10,ror#11
2014	eor	r2,r2,r8
2015	vsli.32	d24,d0,#15
2016	add	r9,r9,r12,ror#6
2017	eor	r12,r10,r11
2018	vshr.u32	d25,d0,#10
2019	eor	r0,r0,r10,ror#20
2020	add	r9,r9,r2
2021	veor	d25,d25,d24
2022	ldr	r2,[sp,#12]
2023	and	r3,r3,r12
2024	vshr.u32	d24,d0,#19
2025	add	r5,r5,r9
2026	add	r9,r9,r0,ror#2
2027	eor	r3,r3,r11
2028	vld1.32	{q8},[r14,:128]!
2029	add	r8,r8,r2
2030	vsli.32	d24,d0,#13
2031	eor	r2,r6,r7
2032	eor	r0,r5,r5,ror#5
2033	veor	d25,d25,d24
2034	add	r9,r9,r3
2035	and	r2,r2,r5
2036	vadd.i32	d1,d1,d25
2037	eor	r3,r0,r5,ror#19
2038	eor	r0,r9,r9,ror#11
2039	vadd.i32	q8,q8,q0
2040	eor	r2,r2,r7
2041	add	r8,r8,r3,ror#6
2042	eor	r3,r9,r10
2043	eor	r0,r0,r9,ror#20
2044	add	r8,r8,r2
2045	ldr	r2,[sp,#16]
2046	and	r12,r12,r3
2047	add	r4,r4,r8
2048	vst1.32	{q8},[r1,:128]!
2049	add	r8,r8,r0,ror#2
2050	eor	r12,r12,r10
2051	vext.8	q8,q1,q2,#4
2052	add	r7,r7,r2
2053	eor	r2,r5,r6
2054	eor	r0,r4,r4,ror#5
2055	vext.8	q9,q3,q0,#4
2056	add	r8,r8,r12
2057	and	r2,r2,r4
2058	eor	r12,r0,r4,ror#19
2059	vshr.u32	q10,q8,#7
2060	eor	r0,r8,r8,ror#11
2061	eor	r2,r2,r6
2062	vadd.i32	q1,q1,q9
2063	add	r7,r7,r12,ror#6
2064	eor	r12,r8,r9
2065	vshr.u32	q9,q8,#3
2066	eor	r0,r0,r8,ror#20
2067	add	r7,r7,r2
2068	vsli.32	q10,q8,#25
2069	ldr	r2,[sp,#20]
2070	and	r3,r3,r12
2071	vshr.u32	q11,q8,#18
2072	add	r11,r11,r7
2073	add	r7,r7,r0,ror#2
2074	eor	r3,r3,r9
2075	veor	q9,q9,q10
2076	add	r6,r6,r2
2077	vsli.32	q11,q8,#14
2078	eor	r2,r4,r5
2079	eor	r0,r11,r11,ror#5
2080	vshr.u32	d24,d1,#17
2081	add	r7,r7,r3
2082	and	r2,r2,r11
2083	veor	q9,q9,q11
2084	eor	r3,r0,r11,ror#19
2085	eor	r0,r7,r7,ror#11
2086	vsli.32	d24,d1,#15
2087	eor	r2,r2,r5
2088	add	r6,r6,r3,ror#6
2089	vshr.u32	d25,d1,#10
2090	eor	r3,r7,r8
2091	eor	r0,r0,r7,ror#20
2092	vadd.i32	q1,q1,q9
2093	add	r6,r6,r2
2094	ldr	r2,[sp,#24]
2095	veor	d25,d25,d24
2096	and	r12,r12,r3
2097	add	r10,r10,r6
2098	vshr.u32	d24,d1,#19
2099	add	r6,r6,r0,ror#2
2100	eor	r12,r12,r8
2101	vsli.32	d24,d1,#13
2102	add	r5,r5,r2
2103	eor	r2,r11,r4
2104	veor	d25,d25,d24
2105	eor	r0,r10,r10,ror#5
2106	add	r6,r6,r12
2107	vadd.i32	d2,d2,d25
2108	and	r2,r2,r10
2109	eor	r12,r0,r10,ror#19
2110	vshr.u32	d24,d2,#17
2111	eor	r0,r6,r6,ror#11
2112	eor	r2,r2,r4
2113	vsli.32	d24,d2,#15
2114	add	r5,r5,r12,ror#6
2115	eor	r12,r6,r7
2116	vshr.u32	d25,d2,#10
2117	eor	r0,r0,r6,ror#20
2118	add	r5,r5,r2
2119	veor	d25,d25,d24
2120	ldr	r2,[sp,#28]
2121	and	r3,r3,r12
2122	vshr.u32	d24,d2,#19
2123	add	r9,r9,r5
2124	add	r5,r5,r0,ror#2
2125	eor	r3,r3,r7
2126	vld1.32	{q8},[r14,:128]!
2127	add	r4,r4,r2
2128	vsli.32	d24,d2,#13
2129	eor	r2,r10,r11
2130	eor	r0,r9,r9,ror#5
2131	veor	d25,d25,d24
2132	add	r5,r5,r3
2133	and	r2,r2,r9
2134	vadd.i32	d3,d3,d25
2135	eor	r3,r0,r9,ror#19
2136	eor	r0,r5,r5,ror#11
2137	vadd.i32	q8,q8,q1
2138	eor	r2,r2,r11
2139	add	r4,r4,r3,ror#6
2140	eor	r3,r5,r6
2141	eor	r0,r0,r5,ror#20
2142	add	r4,r4,r2
2143	ldr	r2,[sp,#32]
2144	and	r12,r12,r3
2145	add	r8,r8,r4
2146	vst1.32	{q8},[r1,:128]!
2147	add	r4,r4,r0,ror#2
2148	eor	r12,r12,r6
2149	vext.8	q8,q2,q3,#4
2150	add	r11,r11,r2
2151	eor	r2,r9,r10
2152	eor	r0,r8,r8,ror#5
2153	vext.8	q9,q0,q1,#4
2154	add	r4,r4,r12
2155	and	r2,r2,r8
2156	eor	r12,r0,r8,ror#19
2157	vshr.u32	q10,q8,#7
2158	eor	r0,r4,r4,ror#11
2159	eor	r2,r2,r10
2160	vadd.i32	q2,q2,q9
2161	add	r11,r11,r12,ror#6
2162	eor	r12,r4,r5
2163	vshr.u32	q9,q8,#3
2164	eor	r0,r0,r4,ror#20
2165	add	r11,r11,r2
2166	vsli.32	q10,q8,#25
2167	ldr	r2,[sp,#36]
2168	and	r3,r3,r12
2169	vshr.u32	q11,q8,#18
2170	add	r7,r7,r11
2171	add	r11,r11,r0,ror#2
2172	eor	r3,r3,r5
2173	veor	q9,q9,q10
2174	add	r10,r10,r2
2175	vsli.32	q11,q8,#14
2176	eor	r2,r8,r9
2177	eor	r0,r7,r7,ror#5
2178	vshr.u32	d24,d3,#17
2179	add	r11,r11,r3
2180	and	r2,r2,r7
2181	veor	q9,q9,q11
2182	eor	r3,r0,r7,ror#19
2183	eor	r0,r11,r11,ror#11
2184	vsli.32	d24,d3,#15
2185	eor	r2,r2,r9
2186	add	r10,r10,r3,ror#6
2187	vshr.u32	d25,d3,#10
2188	eor	r3,r11,r4
2189	eor	r0,r0,r11,ror#20
2190	vadd.i32	q2,q2,q9
2191	add	r10,r10,r2
2192	ldr	r2,[sp,#40]
2193	veor	d25,d25,d24
2194	and	r12,r12,r3
2195	add	r6,r6,r10
2196	vshr.u32	d24,d3,#19
2197	add	r10,r10,r0,ror#2
2198	eor	r12,r12,r4
2199	vsli.32	d24,d3,#13
2200	add	r9,r9,r2
2201	eor	r2,r7,r8
2202	veor	d25,d25,d24
2203	eor	r0,r6,r6,ror#5
2204	add	r10,r10,r12
2205	vadd.i32	d4,d4,d25
2206	and	r2,r2,r6
2207	eor	r12,r0,r6,ror#19
2208	vshr.u32	d24,d4,#17
2209	eor	r0,r10,r10,ror#11
2210	eor	r2,r2,r8
2211	vsli.32	d24,d4,#15
2212	add	r9,r9,r12,ror#6
2213	eor	r12,r10,r11
2214	vshr.u32	d25,d4,#10
2215	eor	r0,r0,r10,ror#20
2216	add	r9,r9,r2
2217	veor	d25,d25,d24
2218	ldr	r2,[sp,#44]
2219	and	r3,r3,r12
2220	vshr.u32	d24,d4,#19
2221	add	r5,r5,r9
2222	add	r9,r9,r0,ror#2
2223	eor	r3,r3,r11
2224	vld1.32	{q8},[r14,:128]!
2225	add	r8,r8,r2
2226	vsli.32	d24,d4,#13
2227	eor	r2,r6,r7
2228	eor	r0,r5,r5,ror#5
2229	veor	d25,d25,d24
2230	add	r9,r9,r3
2231	and	r2,r2,r5
2232	vadd.i32	d5,d5,d25
2233	eor	r3,r0,r5,ror#19
2234	eor	r0,r9,r9,ror#11
2235	vadd.i32	q8,q8,q2
2236	eor	r2,r2,r7
2237	add	r8,r8,r3,ror#6
2238	eor	r3,r9,r10
2239	eor	r0,r0,r9,ror#20
2240	add	r8,r8,r2
2241	ldr	r2,[sp,#48]
2242	and	r12,r12,r3
2243	add	r4,r4,r8
2244	vst1.32	{q8},[r1,:128]!
2245	add	r8,r8,r0,ror#2
2246	eor	r12,r12,r10
2247	vext.8	q8,q3,q0,#4
2248	add	r7,r7,r2
2249	eor	r2,r5,r6
2250	eor	r0,r4,r4,ror#5
2251	vext.8	q9,q1,q2,#4
2252	add	r8,r8,r12
2253	and	r2,r2,r4
2254	eor	r12,r0,r4,ror#19
2255	vshr.u32	q10,q8,#7
2256	eor	r0,r8,r8,ror#11
2257	eor	r2,r2,r6
2258	vadd.i32	q3,q3,q9
2259	add	r7,r7,r12,ror#6
2260	eor	r12,r8,r9
2261	vshr.u32	q9,q8,#3
2262	eor	r0,r0,r8,ror#20
2263	add	r7,r7,r2
2264	vsli.32	q10,q8,#25
2265	ldr	r2,[sp,#52]
2266	and	r3,r3,r12
2267	vshr.u32	q11,q8,#18
2268	add	r11,r11,r7
2269	add	r7,r7,r0,ror#2
2270	eor	r3,r3,r9
2271	veor	q9,q9,q10
2272	add	r6,r6,r2
2273	vsli.32	q11,q8,#14
2274	eor	r2,r4,r5
2275	eor	r0,r11,r11,ror#5
2276	vshr.u32	d24,d5,#17
2277	add	r7,r7,r3
2278	and	r2,r2,r11
2279	veor	q9,q9,q11
2280	eor	r3,r0,r11,ror#19
2281	eor	r0,r7,r7,ror#11
2282	vsli.32	d24,d5,#15
2283	eor	r2,r2,r5
2284	add	r6,r6,r3,ror#6
2285	vshr.u32	d25,d5,#10
2286	eor	r3,r7,r8
2287	eor	r0,r0,r7,ror#20
2288	vadd.i32	q3,q3,q9
2289	add	r6,r6,r2
2290	ldr	r2,[sp,#56]
2291	veor	d25,d25,d24
2292	and	r12,r12,r3
2293	add	r10,r10,r6
2294	vshr.u32	d24,d5,#19
2295	add	r6,r6,r0,ror#2
2296	eor	r12,r12,r8
2297	vsli.32	d24,d5,#13
2298	add	r5,r5,r2
2299	eor	r2,r11,r4
2300	veor	d25,d25,d24
2301	eor	r0,r10,r10,ror#5
2302	add	r6,r6,r12
2303	vadd.i32	d6,d6,d25
2304	and	r2,r2,r10
2305	eor	r12,r0,r10,ror#19
2306	vshr.u32	d24,d6,#17
2307	eor	r0,r6,r6,ror#11
2308	eor	r2,r2,r4
2309	vsli.32	d24,d6,#15
2310	add	r5,r5,r12,ror#6
2311	eor	r12,r6,r7
2312	vshr.u32	d25,d6,#10
2313	eor	r0,r0,r6,ror#20
2314	add	r5,r5,r2
2315	veor	d25,d25,d24
2316	ldr	r2,[sp,#60]
2317	and	r3,r3,r12
2318	vshr.u32	d24,d6,#19
2319	add	r9,r9,r5
2320	add	r5,r5,r0,ror#2
2321	eor	r3,r3,r7
2322	vld1.32	{q8},[r14,:128]!
2323	add	r4,r4,r2
2324	vsli.32	d24,d6,#13
2325	eor	r2,r10,r11
2326	eor	r0,r9,r9,ror#5
2327	veor	d25,d25,d24
2328	add	r5,r5,r3
2329	and	r2,r2,r9
2330	vadd.i32	d7,d7,d25
2331	eor	r3,r0,r9,ror#19
2332	eor	r0,r5,r5,ror#11
2333	vadd.i32	q8,q8,q3
2334	eor	r2,r2,r11
2335	add	r4,r4,r3,ror#6
2336	eor	r3,r5,r6
2337	eor	r0,r0,r5,ror#20
2338	add	r4,r4,r2
2339	ldr	r2,[r14]
2340	and	r12,r12,r3
2341	add	r8,r8,r4
2342	vst1.32	{q8},[r1,:128]!
2343	add	r4,r4,r0,ror#2
2344	eor	r12,r12,r6
2345	teq	r2,#0				@ check for K256 terminator
2346	ldr	r2,[sp,#0]
2347	sub	r1,r1,#64
2348	bne	L_00_48
2349
2350	ldr	r1,[sp,#68]
2351	ldr	r0,[sp,#72]
2352	sub	r14,r14,#256	@ rewind r14
2353	teq	r1,r0
2354	it	eq
2355	subeq	r1,r1,#64		@ avoid SEGV
2356	vld1.8	{q0},[r1]!		@ load next input block
2357	vld1.8	{q1},[r1]!
2358	vld1.8	{q2},[r1]!
2359	vld1.8	{q3},[r1]!
2360	it	ne
2361	strne	r1,[sp,#68]
2362	mov	r1,sp
2363	add	r11,r11,r2
2364	eor	r2,r9,r10
2365	eor	r0,r8,r8,ror#5
2366	add	r4,r4,r12
2367	vld1.32	{q8},[r14,:128]!
2368	and	r2,r2,r8
2369	eor	r12,r0,r8,ror#19
2370	eor	r0,r4,r4,ror#11
2371	eor	r2,r2,r10
2372	vrev32.8	q0,q0
2373	add	r11,r11,r12,ror#6
2374	eor	r12,r4,r5
2375	eor	r0,r0,r4,ror#20
2376	add	r11,r11,r2
2377	vadd.i32	q8,q8,q0
2378	ldr	r2,[sp,#4]
2379	and	r3,r3,r12
2380	add	r7,r7,r11
2381	add	r11,r11,r0,ror#2
2382	eor	r3,r3,r5
2383	add	r10,r10,r2
2384	eor	r2,r8,r9
2385	eor	r0,r7,r7,ror#5
2386	add	r11,r11,r3
2387	and	r2,r2,r7
2388	eor	r3,r0,r7,ror#19
2389	eor	r0,r11,r11,ror#11
2390	eor	r2,r2,r9
2391	add	r10,r10,r3,ror#6
2392	eor	r3,r11,r4
2393	eor	r0,r0,r11,ror#20
2394	add	r10,r10,r2
2395	ldr	r2,[sp,#8]
2396	and	r12,r12,r3
2397	add	r6,r6,r10
2398	add	r10,r10,r0,ror#2
2399	eor	r12,r12,r4
2400	add	r9,r9,r2
2401	eor	r2,r7,r8
2402	eor	r0,r6,r6,ror#5
2403	add	r10,r10,r12
2404	and	r2,r2,r6
2405	eor	r12,r0,r6,ror#19
2406	eor	r0,r10,r10,ror#11
2407	eor	r2,r2,r8
2408	add	r9,r9,r12,ror#6
2409	eor	r12,r10,r11
2410	eor	r0,r0,r10,ror#20
2411	add	r9,r9,r2
2412	ldr	r2,[sp,#12]
2413	and	r3,r3,r12
2414	add	r5,r5,r9
2415	add	r9,r9,r0,ror#2
2416	eor	r3,r3,r11
2417	add	r8,r8,r2
2418	eor	r2,r6,r7
2419	eor	r0,r5,r5,ror#5
2420	add	r9,r9,r3
2421	and	r2,r2,r5
2422	eor	r3,r0,r5,ror#19
2423	eor	r0,r9,r9,ror#11
2424	eor	r2,r2,r7
2425	add	r8,r8,r3,ror#6
2426	eor	r3,r9,r10
2427	eor	r0,r0,r9,ror#20
2428	add	r8,r8,r2
2429	ldr	r2,[sp,#16]
2430	and	r12,r12,r3
2431	add	r4,r4,r8
2432	add	r8,r8,r0,ror#2
2433	eor	r12,r12,r10
2434	vst1.32	{q8},[r1,:128]!
2435	add	r7,r7,r2
2436	eor	r2,r5,r6
2437	eor	r0,r4,r4,ror#5
2438	add	r8,r8,r12
2439	vld1.32	{q8},[r14,:128]!
2440	and	r2,r2,r4
2441	eor	r12,r0,r4,ror#19
2442	eor	r0,r8,r8,ror#11
2443	eor	r2,r2,r6
2444	vrev32.8	q1,q1
2445	add	r7,r7,r12,ror#6
2446	eor	r12,r8,r9
2447	eor	r0,r0,r8,ror#20
2448	add	r7,r7,r2
2449	vadd.i32	q8,q8,q1
2450	ldr	r2,[sp,#20]
2451	and	r3,r3,r12
2452	add	r11,r11,r7
2453	add	r7,r7,r0,ror#2
2454	eor	r3,r3,r9
2455	add	r6,r6,r2
2456	eor	r2,r4,r5
2457	eor	r0,r11,r11,ror#5
2458	add	r7,r7,r3
2459	and	r2,r2,r11
2460	eor	r3,r0,r11,ror#19
2461	eor	r0,r7,r7,ror#11
2462	eor	r2,r2,r5
2463	add	r6,r6,r3,ror#6
2464	eor	r3,r7,r8
2465	eor	r0,r0,r7,ror#20
2466	add	r6,r6,r2
2467	ldr	r2,[sp,#24]
2468	and	r12,r12,r3
2469	add	r10,r10,r6
2470	add	r6,r6,r0,ror#2
2471	eor	r12,r12,r8
2472	add	r5,r5,r2
2473	eor	r2,r11,r4
2474	eor	r0,r10,r10,ror#5
2475	add	r6,r6,r12
2476	and	r2,r2,r10
2477	eor	r12,r0,r10,ror#19
2478	eor	r0,r6,r6,ror#11
2479	eor	r2,r2,r4
2480	add	r5,r5,r12,ror#6
2481	eor	r12,r6,r7
2482	eor	r0,r0,r6,ror#20
2483	add	r5,r5,r2
2484	ldr	r2,[sp,#28]
2485	and	r3,r3,r12
2486	add	r9,r9,r5
2487	add	r5,r5,r0,ror#2
2488	eor	r3,r3,r7
2489	add	r4,r4,r2
2490	eor	r2,r10,r11
2491	eor	r0,r9,r9,ror#5
2492	add	r5,r5,r3
2493	and	r2,r2,r9
2494	eor	r3,r0,r9,ror#19
2495	eor	r0,r5,r5,ror#11
2496	eor	r2,r2,r11
2497	add	r4,r4,r3,ror#6
2498	eor	r3,r5,r6
2499	eor	r0,r0,r5,ror#20
2500	add	r4,r4,r2
2501	ldr	r2,[sp,#32]
2502	and	r12,r12,r3
2503	add	r8,r8,r4
2504	add	r4,r4,r0,ror#2
2505	eor	r12,r12,r6
2506	vst1.32	{q8},[r1,:128]!
2507	add	r11,r11,r2
2508	eor	r2,r9,r10
2509	eor	r0,r8,r8,ror#5
2510	add	r4,r4,r12
2511	vld1.32	{q8},[r14,:128]!
2512	and	r2,r2,r8
2513	eor	r12,r0,r8,ror#19
2514	eor	r0,r4,r4,ror#11
2515	eor	r2,r2,r10
2516	vrev32.8	q2,q2
2517	add	r11,r11,r12,ror#6
2518	eor	r12,r4,r5
2519	eor	r0,r0,r4,ror#20
2520	add	r11,r11,r2
2521	vadd.i32	q8,q8,q2
2522	ldr	r2,[sp,#36]
2523	and	r3,r3,r12
2524	add	r7,r7,r11
2525	add	r11,r11,r0,ror#2
2526	eor	r3,r3,r5
2527	add	r10,r10,r2
2528	eor	r2,r8,r9
2529	eor	r0,r7,r7,ror#5
2530	add	r11,r11,r3
2531	and	r2,r2,r7
2532	eor	r3,r0,r7,ror#19
2533	eor	r0,r11,r11,ror#11
2534	eor	r2,r2,r9
2535	add	r10,r10,r3,ror#6
2536	eor	r3,r11,r4
2537	eor	r0,r0,r11,ror#20
2538	add	r10,r10,r2
2539	ldr	r2,[sp,#40]
2540	and	r12,r12,r3
2541	add	r6,r6,r10
2542	add	r10,r10,r0,ror#2
2543	eor	r12,r12,r4
2544	add	r9,r9,r2
2545	eor	r2,r7,r8
2546	eor	r0,r6,r6,ror#5
2547	add	r10,r10,r12
2548	and	r2,r2,r6
2549	eor	r12,r0,r6,ror#19
2550	eor	r0,r10,r10,ror#11
2551	eor	r2,r2,r8
2552	add	r9,r9,r12,ror#6
2553	eor	r12,r10,r11
2554	eor	r0,r0,r10,ror#20
2555	add	r9,r9,r2
2556	ldr	r2,[sp,#44]
2557	and	r3,r3,r12
2558	add	r5,r5,r9
2559	add	r9,r9,r0,ror#2
2560	eor	r3,r3,r11
2561	add	r8,r8,r2
2562	eor	r2,r6,r7
2563	eor	r0,r5,r5,ror#5
2564	add	r9,r9,r3
2565	and	r2,r2,r5
2566	eor	r3,r0,r5,ror#19
2567	eor	r0,r9,r9,ror#11
2568	eor	r2,r2,r7
2569	add	r8,r8,r3,ror#6
2570	eor	r3,r9,r10
2571	eor	r0,r0,r9,ror#20
2572	add	r8,r8,r2
2573	ldr	r2,[sp,#48]
2574	and	r12,r12,r3
2575	add	r4,r4,r8
2576	add	r8,r8,r0,ror#2
2577	eor	r12,r12,r10
2578	vst1.32	{q8},[r1,:128]!
2579	add	r7,r7,r2
2580	eor	r2,r5,r6
2581	eor	r0,r4,r4,ror#5
2582	add	r8,r8,r12
2583	vld1.32	{q8},[r14,:128]!
2584	and	r2,r2,r4
2585	eor	r12,r0,r4,ror#19
2586	eor	r0,r8,r8,ror#11
2587	eor	r2,r2,r6
2588	vrev32.8	q3,q3
2589	add	r7,r7,r12,ror#6
2590	eor	r12,r8,r9
2591	eor	r0,r0,r8,ror#20
2592	add	r7,r7,r2
2593	vadd.i32	q8,q8,q3
2594	ldr	r2,[sp,#52]
2595	and	r3,r3,r12
2596	add	r11,r11,r7
2597	add	r7,r7,r0,ror#2
2598	eor	r3,r3,r9
2599	add	r6,r6,r2
2600	eor	r2,r4,r5
2601	eor	r0,r11,r11,ror#5
2602	add	r7,r7,r3
2603	and	r2,r2,r11
2604	eor	r3,r0,r11,ror#19
2605	eor	r0,r7,r7,ror#11
2606	eor	r2,r2,r5
2607	add	r6,r6,r3,ror#6
2608	eor	r3,r7,r8
2609	eor	r0,r0,r7,ror#20
2610	add	r6,r6,r2
2611	ldr	r2,[sp,#56]
2612	and	r12,r12,r3
2613	add	r10,r10,r6
2614	add	r6,r6,r0,ror#2
2615	eor	r12,r12,r8
2616	add	r5,r5,r2
2617	eor	r2,r11,r4
2618	eor	r0,r10,r10,ror#5
2619	add	r6,r6,r12
2620	and	r2,r2,r10
2621	eor	r12,r0,r10,ror#19
2622	eor	r0,r6,r6,ror#11
2623	eor	r2,r2,r4
2624	add	r5,r5,r12,ror#6
2625	eor	r12,r6,r7
2626	eor	r0,r0,r6,ror#20
2627	add	r5,r5,r2
2628	ldr	r2,[sp,#60]
2629	and	r3,r3,r12
2630	add	r9,r9,r5
2631	add	r5,r5,r0,ror#2
2632	eor	r3,r3,r7
2633	add	r4,r4,r2
2634	eor	r2,r10,r11
2635	eor	r0,r9,r9,ror#5
2636	add	r5,r5,r3
2637	and	r2,r2,r9
2638	eor	r3,r0,r9,ror#19
2639	eor	r0,r5,r5,ror#11
2640	eor	r2,r2,r11
2641	add	r4,r4,r3,ror#6
2642	eor	r3,r5,r6
2643	eor	r0,r0,r5,ror#20
2644	add	r4,r4,r2
2645	ldr	r2,[sp,#64]
2646	and	r12,r12,r3
2647	add	r8,r8,r4
2648	add	r4,r4,r0,ror#2
2649	eor	r12,r12,r6
2650	vst1.32	{q8},[r1,:128]!
2651	ldr	r0,[r2,#0]
2652	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2653	ldr	r12,[r2,#4]
2654	ldr	r3,[r2,#8]
2655	ldr	r1,[r2,#12]
2656	add	r4,r4,r0			@ accumulate
2657	ldr	r0,[r2,#16]
2658	add	r5,r5,r12
2659	ldr	r12,[r2,#20]
2660	add	r6,r6,r3
2661	ldr	r3,[r2,#24]
2662	add	r7,r7,r1
2663	ldr	r1,[r2,#28]
2664	add	r8,r8,r0
2665	str	r4,[r2],#4
2666	add	r9,r9,r12
2667	str	r5,[r2],#4
2668	add	r10,r10,r3
2669	str	r6,[r2],#4
2670	add	r11,r11,r1
2671	str	r7,[r2],#4
2672	stmia	r2,{r8,r9,r10,r11}
2673
2674	ittte	ne
2675	movne	r1,sp
2676	ldrne	r2,[sp,#0]
2677	eorne	r12,r12,r12
2678	ldreq	sp,[sp,#76]			@ restore original sp
2679	itt	ne
2680	eorne	r3,r5,r6
2681	bne	L_00_48
2682
2683	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2684
2685#endif
2686#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2687
2688# if defined(__thumb2__)
2689#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2690# else
2691#  define INST(a,b,c,d)	.byte	a,b,c,d
2692# endif
2693
2694#ifdef __thumb2__
2695.thumb_func	sha256_block_data_order_armv8
2696#endif
2697.align	5
2698sha256_block_data_order_armv8:
2699LARMv8:
2700	vld1.32	{q0,q1},[r0]
2701	sub	r3,r3,#256+32
2702	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2703	b	Loop_v8
2704
2705.align	4
2706Loop_v8:
2707	vld1.8	{q8,q9},[r1]!
2708	vld1.8	{q10,q11},[r1]!
2709	vld1.32	{q12},[r3]!
2710	vrev32.8	q8,q8
2711	vrev32.8	q9,q9
2712	vrev32.8	q10,q10
2713	vrev32.8	q11,q11
2714	vmov	q14,q0	@ offload
2715	vmov	q15,q1
2716	teq	r1,r2
2717	vld1.32	{q13},[r3]!
2718	vadd.i32	q12,q12,q8
2719	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2720	vmov	q2,q0
2721	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2722	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2723	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2724	vld1.32	{q12},[r3]!
2725	vadd.i32	q13,q13,q9
2726	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2727	vmov	q2,q0
2728	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2729	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2730	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2731	vld1.32	{q13},[r3]!
2732	vadd.i32	q12,q12,q10
2733	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2734	vmov	q2,q0
2735	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2736	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2737	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2738	vld1.32	{q12},[r3]!
2739	vadd.i32	q13,q13,q11
2740	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2741	vmov	q2,q0
2742	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2743	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2744	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2745	vld1.32	{q13},[r3]!
2746	vadd.i32	q12,q12,q8
2747	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2748	vmov	q2,q0
2749	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2750	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2751	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2752	vld1.32	{q12},[r3]!
2753	vadd.i32	q13,q13,q9
2754	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2755	vmov	q2,q0
2756	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2757	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2758	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2759	vld1.32	{q13},[r3]!
2760	vadd.i32	q12,q12,q10
2761	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2762	vmov	q2,q0
2763	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2764	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2765	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2766	vld1.32	{q12},[r3]!
2767	vadd.i32	q13,q13,q11
2768	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2769	vmov	q2,q0
2770	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2771	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2772	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2773	vld1.32	{q13},[r3]!
2774	vadd.i32	q12,q12,q8
2775	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2776	vmov	q2,q0
2777	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2778	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2779	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2780	vld1.32	{q12},[r3]!
2781	vadd.i32	q13,q13,q9
2782	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2783	vmov	q2,q0
2784	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2785	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2786	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2787	vld1.32	{q13},[r3]!
2788	vadd.i32	q12,q12,q10
2789	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2790	vmov	q2,q0
2791	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2792	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2793	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2794	vld1.32	{q12},[r3]!
2795	vadd.i32	q13,q13,q11
2796	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2797	vmov	q2,q0
2798	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2799	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2800	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2801	vld1.32	{q13},[r3]!
2802	vadd.i32	q12,q12,q8
2803	vmov	q2,q0
2804	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2805	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2806
2807	vld1.32	{q12},[r3]!
2808	vadd.i32	q13,q13,q9
2809	vmov	q2,q0
2810	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2811	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2812
2813	vld1.32	{q13},[r3]
2814	vadd.i32	q12,q12,q10
2815	sub	r3,r3,#256-16	@ rewind
2816	vmov	q2,q0
2817	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2818	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2819
2820	vadd.i32	q13,q13,q11
2821	vmov	q2,q0
2822	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2823	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2824
2825	vadd.i32	q0,q0,q14
2826	vadd.i32	q1,q1,q15
2827	it	ne
2828	bne	Loop_v8
2829
2830	vst1.32	{q0,q1},[r0]
2831
2832	bx	lr		@ bx lr
2833
2834#endif
2835.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2836.align	2
2837.align	2
2838#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2839.comm	_OPENSSL_armcap_P,4
2840.non_lazy_symbol_pointer
2841OPENSSL_armcap_P:
2842.indirect_symbol	_OPENSSL_armcap_P
2843.long	0
2844.private_extern	_OPENSSL_armcap_P
2845#endif
2846#endif  // !OPENSSL_NO_ASM
2847