1@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
2@
3@ Licensed under the OpenSSL license (the "License").  You may not use
4@ this file except in compliance with the License.  You can obtain a copy
5@ in the file LICENSE in the source distribution or at
6@ https://www.openssl.org/source/license.html
7
8
9@ ====================================================================
10@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
11@ project. The module is, however, dual licensed under OpenSSL and
12@ CRYPTOGAMS licenses depending on where you obtain it. For further
13@ details see http://www.openssl.org/~appro/cryptogams/.
14@
15@ Permission to use under GPL terms is granted.
16@ ====================================================================
17
18@ SHA256 block procedure for ARMv4. May 2007.
19
20@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
21@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
22@ byte [on single-issue Xscale PXA250 core].
23
24@ July 2010.
25@
26@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
27@ Cortex A8 core and ~20 cycles per processed byte.
28
29@ February 2011.
30@
31@ Profiler-assisted and platform-specific optimization resulted in 16%
32@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
33
34@ September 2013.
35@
36@ Add NEON implementation. On Cortex A8 it was measured to process one
37@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
38@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
39@ code (meaning that latter performs sub-optimally, nothing was done
40@ about it).
41
42@ May 2014.
43@
44@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
45
46#ifndef __KERNEL__
47# include <openssl/arm_arch.h>
48#else
49# define __ARM_ARCH__ __LINUX_ARM_ARCH__
50# define __ARM_MAX_ARCH__ 7
51#endif
52
53@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
54@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
55@ instructions are manually-encoded. (See unsha256.)
56
57
58.text
59#if defined(__thumb2__)
60.syntax	unified
61.thumb
62#else
63.code	32
64#endif
65
66
67.align	5
68K256:
69.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
70.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
71.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
72.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
73.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
74.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
75.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
76.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
77.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
78.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
79.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
80.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
81.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
82.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
83.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
84.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
85
86.word	0				@ terminator
87#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
88LOPENSSL_armcap:
89.word	OPENSSL_armcap_P-Lsha256_block_data_order
90#endif
91.align	5
92
93.globl	_sha256_block_data_order
94.private_extern	_sha256_block_data_order
95#ifdef __thumb2__
96.thumb_func	_sha256_block_data_order
97#endif
98_sha256_block_data_order:
99Lsha256_block_data_order:
100#if __ARM_ARCH__<7 && !defined(__thumb2__)
101	sub	r3,pc,#8		@ _sha256_block_data_order
102#else
103	adr	r3,Lsha256_block_data_order
104#endif
105#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
106	ldr	r12,LOPENSSL_armcap
107	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
108#ifdef	__APPLE__
109	ldr	r12,[r12]
110#endif
111	tst	r12,#ARMV8_SHA256
112	bne	LARMv8
113	tst	r12,#ARMV7_NEON
114	bne	LNEON
115#endif
116	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
117	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
118	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
119	sub	r14,r3,#256+32	@ K256
120	sub	sp,sp,#16*4		@ alloca(X[16])
121Loop:
122# if __ARM_ARCH__>=7
123	ldr	r2,[r1],#4
124# else
125	ldrb	r2,[r1,#3]
126# endif
127	eor	r3,r5,r6		@ magic
128	eor	r12,r12,r12
129#if __ARM_ARCH__>=7
130	@ ldr	r2,[r1],#4			@ 0
131# if 0==15
132	str	r1,[sp,#17*4]			@ make room for r1
133# endif
134	eor	r0,r8,r8,ror#5
135	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
136	eor	r0,r0,r8,ror#19	@ Sigma1(e)
137# ifndef __ARMEB__
138	rev	r2,r2
139# endif
140#else
141	@ ldrb	r2,[r1,#3]			@ 0
142	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
143	ldrb	r12,[r1,#2]
144	ldrb	r0,[r1,#1]
145	orr	r2,r2,r12,lsl#8
146	ldrb	r12,[r1],#4
147	orr	r2,r2,r0,lsl#16
148# if 0==15
149	str	r1,[sp,#17*4]			@ make room for r1
150# endif
151	eor	r0,r8,r8,ror#5
152	orr	r2,r2,r12,lsl#24
153	eor	r0,r0,r8,ror#19	@ Sigma1(e)
154#endif
155	ldr	r12,[r14],#4			@ *K256++
156	add	r11,r11,r2			@ h+=X[i]
157	str	r2,[sp,#0*4]
158	eor	r2,r9,r10
159	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
160	and	r2,r2,r8
161	add	r11,r11,r12			@ h+=K256[i]
162	eor	r2,r2,r10			@ Ch(e,f,g)
163	eor	r0,r4,r4,ror#11
164	add	r11,r11,r2			@ h+=Ch(e,f,g)
165#if 0==31
166	and	r12,r12,#0xff
167	cmp	r12,#0xf2			@ done?
168#endif
169#if 0<15
170# if __ARM_ARCH__>=7
171	ldr	r2,[r1],#4			@ prefetch
172# else
173	ldrb	r2,[r1,#3]
174# endif
175	eor	r12,r4,r5			@ a^b, b^c in next round
176#else
177	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
178	eor	r12,r4,r5			@ a^b, b^c in next round
179	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
180#endif
181	eor	r0,r0,r4,ror#20	@ Sigma0(a)
182	and	r3,r3,r12			@ (b^c)&=(a^b)
183	add	r7,r7,r11			@ d+=h
184	eor	r3,r3,r5			@ Maj(a,b,c)
185	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
186	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
187#if __ARM_ARCH__>=7
188	@ ldr	r2,[r1],#4			@ 1
189# if 1==15
190	str	r1,[sp,#17*4]			@ make room for r1
191# endif
192	eor	r0,r7,r7,ror#5
193	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
194	eor	r0,r0,r7,ror#19	@ Sigma1(e)
195# ifndef __ARMEB__
196	rev	r2,r2
197# endif
198#else
199	@ ldrb	r2,[r1,#3]			@ 1
200	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
201	ldrb	r3,[r1,#2]
202	ldrb	r0,[r1,#1]
203	orr	r2,r2,r3,lsl#8
204	ldrb	r3,[r1],#4
205	orr	r2,r2,r0,lsl#16
206# if 1==15
207	str	r1,[sp,#17*4]			@ make room for r1
208# endif
209	eor	r0,r7,r7,ror#5
210	orr	r2,r2,r3,lsl#24
211	eor	r0,r0,r7,ror#19	@ Sigma1(e)
212#endif
213	ldr	r3,[r14],#4			@ *K256++
214	add	r10,r10,r2			@ h+=X[i]
215	str	r2,[sp,#1*4]
216	eor	r2,r8,r9
217	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
218	and	r2,r2,r7
219	add	r10,r10,r3			@ h+=K256[i]
220	eor	r2,r2,r9			@ Ch(e,f,g)
221	eor	r0,r11,r11,ror#11
222	add	r10,r10,r2			@ h+=Ch(e,f,g)
223#if 1==31
224	and	r3,r3,#0xff
225	cmp	r3,#0xf2			@ done?
226#endif
227#if 1<15
228# if __ARM_ARCH__>=7
229	ldr	r2,[r1],#4			@ prefetch
230# else
231	ldrb	r2,[r1,#3]
232# endif
233	eor	r3,r11,r4			@ a^b, b^c in next round
234#else
235	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
236	eor	r3,r11,r4			@ a^b, b^c in next round
237	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
238#endif
239	eor	r0,r0,r11,ror#20	@ Sigma0(a)
240	and	r12,r12,r3			@ (b^c)&=(a^b)
241	add	r6,r6,r10			@ d+=h
242	eor	r12,r12,r4			@ Maj(a,b,c)
243	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
244	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
245#if __ARM_ARCH__>=7
246	@ ldr	r2,[r1],#4			@ 2
247# if 2==15
248	str	r1,[sp,#17*4]			@ make room for r1
249# endif
250	eor	r0,r6,r6,ror#5
251	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
252	eor	r0,r0,r6,ror#19	@ Sigma1(e)
253# ifndef __ARMEB__
254	rev	r2,r2
255# endif
256#else
257	@ ldrb	r2,[r1,#3]			@ 2
258	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
259	ldrb	r12,[r1,#2]
260	ldrb	r0,[r1,#1]
261	orr	r2,r2,r12,lsl#8
262	ldrb	r12,[r1],#4
263	orr	r2,r2,r0,lsl#16
264# if 2==15
265	str	r1,[sp,#17*4]			@ make room for r1
266# endif
267	eor	r0,r6,r6,ror#5
268	orr	r2,r2,r12,lsl#24
269	eor	r0,r0,r6,ror#19	@ Sigma1(e)
270#endif
271	ldr	r12,[r14],#4			@ *K256++
272	add	r9,r9,r2			@ h+=X[i]
273	str	r2,[sp,#2*4]
274	eor	r2,r7,r8
275	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
276	and	r2,r2,r6
277	add	r9,r9,r12			@ h+=K256[i]
278	eor	r2,r2,r8			@ Ch(e,f,g)
279	eor	r0,r10,r10,ror#11
280	add	r9,r9,r2			@ h+=Ch(e,f,g)
281#if 2==31
282	and	r12,r12,#0xff
283	cmp	r12,#0xf2			@ done?
284#endif
285#if 2<15
286# if __ARM_ARCH__>=7
287	ldr	r2,[r1],#4			@ prefetch
288# else
289	ldrb	r2,[r1,#3]
290# endif
291	eor	r12,r10,r11			@ a^b, b^c in next round
292#else
293	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
294	eor	r12,r10,r11			@ a^b, b^c in next round
295	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
296#endif
297	eor	r0,r0,r10,ror#20	@ Sigma0(a)
298	and	r3,r3,r12			@ (b^c)&=(a^b)
299	add	r5,r5,r9			@ d+=h
300	eor	r3,r3,r11			@ Maj(a,b,c)
301	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
302	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
303#if __ARM_ARCH__>=7
304	@ ldr	r2,[r1],#4			@ 3
305# if 3==15
306	str	r1,[sp,#17*4]			@ make room for r1
307# endif
308	eor	r0,r5,r5,ror#5
309	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
310	eor	r0,r0,r5,ror#19	@ Sigma1(e)
311# ifndef __ARMEB__
312	rev	r2,r2
313# endif
314#else
315	@ ldrb	r2,[r1,#3]			@ 3
316	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
317	ldrb	r3,[r1,#2]
318	ldrb	r0,[r1,#1]
319	orr	r2,r2,r3,lsl#8
320	ldrb	r3,[r1],#4
321	orr	r2,r2,r0,lsl#16
322# if 3==15
323	str	r1,[sp,#17*4]			@ make room for r1
324# endif
325	eor	r0,r5,r5,ror#5
326	orr	r2,r2,r3,lsl#24
327	eor	r0,r0,r5,ror#19	@ Sigma1(e)
328#endif
329	ldr	r3,[r14],#4			@ *K256++
330	add	r8,r8,r2			@ h+=X[i]
331	str	r2,[sp,#3*4]
332	eor	r2,r6,r7
333	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
334	and	r2,r2,r5
335	add	r8,r8,r3			@ h+=K256[i]
336	eor	r2,r2,r7			@ Ch(e,f,g)
337	eor	r0,r9,r9,ror#11
338	add	r8,r8,r2			@ h+=Ch(e,f,g)
339#if 3==31
340	and	r3,r3,#0xff
341	cmp	r3,#0xf2			@ done?
342#endif
343#if 3<15
344# if __ARM_ARCH__>=7
345	ldr	r2,[r1],#4			@ prefetch
346# else
347	ldrb	r2,[r1,#3]
348# endif
349	eor	r3,r9,r10			@ a^b, b^c in next round
350#else
351	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
352	eor	r3,r9,r10			@ a^b, b^c in next round
353	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
354#endif
355	eor	r0,r0,r9,ror#20	@ Sigma0(a)
356	and	r12,r12,r3			@ (b^c)&=(a^b)
357	add	r4,r4,r8			@ d+=h
358	eor	r12,r12,r10			@ Maj(a,b,c)
359	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
360	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
361#if __ARM_ARCH__>=7
362	@ ldr	r2,[r1],#4			@ 4
363# if 4==15
364	str	r1,[sp,#17*4]			@ make room for r1
365# endif
366	eor	r0,r4,r4,ror#5
367	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
368	eor	r0,r0,r4,ror#19	@ Sigma1(e)
369# ifndef __ARMEB__
370	rev	r2,r2
371# endif
372#else
373	@ ldrb	r2,[r1,#3]			@ 4
374	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
375	ldrb	r12,[r1,#2]
376	ldrb	r0,[r1,#1]
377	orr	r2,r2,r12,lsl#8
378	ldrb	r12,[r1],#4
379	orr	r2,r2,r0,lsl#16
380# if 4==15
381	str	r1,[sp,#17*4]			@ make room for r1
382# endif
383	eor	r0,r4,r4,ror#5
384	orr	r2,r2,r12,lsl#24
385	eor	r0,r0,r4,ror#19	@ Sigma1(e)
386#endif
387	ldr	r12,[r14],#4			@ *K256++
388	add	r7,r7,r2			@ h+=X[i]
389	str	r2,[sp,#4*4]
390	eor	r2,r5,r6
391	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
392	and	r2,r2,r4
393	add	r7,r7,r12			@ h+=K256[i]
394	eor	r2,r2,r6			@ Ch(e,f,g)
395	eor	r0,r8,r8,ror#11
396	add	r7,r7,r2			@ h+=Ch(e,f,g)
397#if 4==31
398	and	r12,r12,#0xff
399	cmp	r12,#0xf2			@ done?
400#endif
401#if 4<15
402# if __ARM_ARCH__>=7
403	ldr	r2,[r1],#4			@ prefetch
404# else
405	ldrb	r2,[r1,#3]
406# endif
407	eor	r12,r8,r9			@ a^b, b^c in next round
408#else
409	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
410	eor	r12,r8,r9			@ a^b, b^c in next round
411	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
412#endif
413	eor	r0,r0,r8,ror#20	@ Sigma0(a)
414	and	r3,r3,r12			@ (b^c)&=(a^b)
415	add	r11,r11,r7			@ d+=h
416	eor	r3,r3,r9			@ Maj(a,b,c)
417	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
418	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
419#if __ARM_ARCH__>=7
420	@ ldr	r2,[r1],#4			@ 5
421# if 5==15
422	str	r1,[sp,#17*4]			@ make room for r1
423# endif
424	eor	r0,r11,r11,ror#5
425	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
426	eor	r0,r0,r11,ror#19	@ Sigma1(e)
427# ifndef __ARMEB__
428	rev	r2,r2
429# endif
430#else
431	@ ldrb	r2,[r1,#3]			@ 5
432	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
433	ldrb	r3,[r1,#2]
434	ldrb	r0,[r1,#1]
435	orr	r2,r2,r3,lsl#8
436	ldrb	r3,[r1],#4
437	orr	r2,r2,r0,lsl#16
438# if 5==15
439	str	r1,[sp,#17*4]			@ make room for r1
440# endif
441	eor	r0,r11,r11,ror#5
442	orr	r2,r2,r3,lsl#24
443	eor	r0,r0,r11,ror#19	@ Sigma1(e)
444#endif
445	ldr	r3,[r14],#4			@ *K256++
446	add	r6,r6,r2			@ h+=X[i]
447	str	r2,[sp,#5*4]
448	eor	r2,r4,r5
449	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
450	and	r2,r2,r11
451	add	r6,r6,r3			@ h+=K256[i]
452	eor	r2,r2,r5			@ Ch(e,f,g)
453	eor	r0,r7,r7,ror#11
454	add	r6,r6,r2			@ h+=Ch(e,f,g)
455#if 5==31
456	and	r3,r3,#0xff
457	cmp	r3,#0xf2			@ done?
458#endif
459#if 5<15
460# if __ARM_ARCH__>=7
461	ldr	r2,[r1],#4			@ prefetch
462# else
463	ldrb	r2,[r1,#3]
464# endif
465	eor	r3,r7,r8			@ a^b, b^c in next round
466#else
467	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
468	eor	r3,r7,r8			@ a^b, b^c in next round
469	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
470#endif
471	eor	r0,r0,r7,ror#20	@ Sigma0(a)
472	and	r12,r12,r3			@ (b^c)&=(a^b)
473	add	r10,r10,r6			@ d+=h
474	eor	r12,r12,r8			@ Maj(a,b,c)
475	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
476	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
477#if __ARM_ARCH__>=7
478	@ ldr	r2,[r1],#4			@ 6
479# if 6==15
480	str	r1,[sp,#17*4]			@ make room for r1
481# endif
482	eor	r0,r10,r10,ror#5
483	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
484	eor	r0,r0,r10,ror#19	@ Sigma1(e)
485# ifndef __ARMEB__
486	rev	r2,r2
487# endif
488#else
489	@ ldrb	r2,[r1,#3]			@ 6
490	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
491	ldrb	r12,[r1,#2]
492	ldrb	r0,[r1,#1]
493	orr	r2,r2,r12,lsl#8
494	ldrb	r12,[r1],#4
495	orr	r2,r2,r0,lsl#16
496# if 6==15
497	str	r1,[sp,#17*4]			@ make room for r1
498# endif
499	eor	r0,r10,r10,ror#5
500	orr	r2,r2,r12,lsl#24
501	eor	r0,r0,r10,ror#19	@ Sigma1(e)
502#endif
503	ldr	r12,[r14],#4			@ *K256++
504	add	r5,r5,r2			@ h+=X[i]
505	str	r2,[sp,#6*4]
506	eor	r2,r11,r4
507	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
508	and	r2,r2,r10
509	add	r5,r5,r12			@ h+=K256[i]
510	eor	r2,r2,r4			@ Ch(e,f,g)
511	eor	r0,r6,r6,ror#11
512	add	r5,r5,r2			@ h+=Ch(e,f,g)
513#if 6==31
514	and	r12,r12,#0xff
515	cmp	r12,#0xf2			@ done?
516#endif
517#if 6<15
518# if __ARM_ARCH__>=7
519	ldr	r2,[r1],#4			@ prefetch
520# else
521	ldrb	r2,[r1,#3]
522# endif
523	eor	r12,r6,r7			@ a^b, b^c in next round
524#else
525	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
526	eor	r12,r6,r7			@ a^b, b^c in next round
527	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
528#endif
529	eor	r0,r0,r6,ror#20	@ Sigma0(a)
530	and	r3,r3,r12			@ (b^c)&=(a^b)
531	add	r9,r9,r5			@ d+=h
532	eor	r3,r3,r7			@ Maj(a,b,c)
533	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
534	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
535#if __ARM_ARCH__>=7
536	@ ldr	r2,[r1],#4			@ 7
537# if 7==15
538	str	r1,[sp,#17*4]			@ make room for r1
539# endif
540	eor	r0,r9,r9,ror#5
541	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
542	eor	r0,r0,r9,ror#19	@ Sigma1(e)
543# ifndef __ARMEB__
544	rev	r2,r2
545# endif
546#else
547	@ ldrb	r2,[r1,#3]			@ 7
548	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
549	ldrb	r3,[r1,#2]
550	ldrb	r0,[r1,#1]
551	orr	r2,r2,r3,lsl#8
552	ldrb	r3,[r1],#4
553	orr	r2,r2,r0,lsl#16
554# if 7==15
555	str	r1,[sp,#17*4]			@ make room for r1
556# endif
557	eor	r0,r9,r9,ror#5
558	orr	r2,r2,r3,lsl#24
559	eor	r0,r0,r9,ror#19	@ Sigma1(e)
560#endif
561	ldr	r3,[r14],#4			@ *K256++
562	add	r4,r4,r2			@ h+=X[i]
563	str	r2,[sp,#7*4]
564	eor	r2,r10,r11
565	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
566	and	r2,r2,r9
567	add	r4,r4,r3			@ h+=K256[i]
568	eor	r2,r2,r11			@ Ch(e,f,g)
569	eor	r0,r5,r5,ror#11
570	add	r4,r4,r2			@ h+=Ch(e,f,g)
571#if 7==31
572	and	r3,r3,#0xff
573	cmp	r3,#0xf2			@ done?
574#endif
575#if 7<15
576# if __ARM_ARCH__>=7
577	ldr	r2,[r1],#4			@ prefetch
578# else
579	ldrb	r2,[r1,#3]
580# endif
581	eor	r3,r5,r6			@ a^b, b^c in next round
582#else
583	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
584	eor	r3,r5,r6			@ a^b, b^c in next round
585	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
586#endif
587	eor	r0,r0,r5,ror#20	@ Sigma0(a)
588	and	r12,r12,r3			@ (b^c)&=(a^b)
589	add	r8,r8,r4			@ d+=h
590	eor	r12,r12,r6			@ Maj(a,b,c)
591	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
592	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
593#if __ARM_ARCH__>=7
594	@ ldr	r2,[r1],#4			@ 8
595# if 8==15
596	str	r1,[sp,#17*4]			@ make room for r1
597# endif
598	eor	r0,r8,r8,ror#5
599	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
600	eor	r0,r0,r8,ror#19	@ Sigma1(e)
601# ifndef __ARMEB__
602	rev	r2,r2
603# endif
604#else
605	@ ldrb	r2,[r1,#3]			@ 8
606	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
607	ldrb	r12,[r1,#2]
608	ldrb	r0,[r1,#1]
609	orr	r2,r2,r12,lsl#8
610	ldrb	r12,[r1],#4
611	orr	r2,r2,r0,lsl#16
612# if 8==15
613	str	r1,[sp,#17*4]			@ make room for r1
614# endif
615	eor	r0,r8,r8,ror#5
616	orr	r2,r2,r12,lsl#24
617	eor	r0,r0,r8,ror#19	@ Sigma1(e)
618#endif
619	ldr	r12,[r14],#4			@ *K256++
620	add	r11,r11,r2			@ h+=X[i]
621	str	r2,[sp,#8*4]
622	eor	r2,r9,r10
623	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
624	and	r2,r2,r8
625	add	r11,r11,r12			@ h+=K256[i]
626	eor	r2,r2,r10			@ Ch(e,f,g)
627	eor	r0,r4,r4,ror#11
628	add	r11,r11,r2			@ h+=Ch(e,f,g)
629#if 8==31
630	and	r12,r12,#0xff
631	cmp	r12,#0xf2			@ done?
632#endif
633#if 8<15
634# if __ARM_ARCH__>=7
635	ldr	r2,[r1],#4			@ prefetch
636# else
637	ldrb	r2,[r1,#3]
638# endif
639	eor	r12,r4,r5			@ a^b, b^c in next round
640#else
641	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
642	eor	r12,r4,r5			@ a^b, b^c in next round
643	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
644#endif
645	eor	r0,r0,r4,ror#20	@ Sigma0(a)
646	and	r3,r3,r12			@ (b^c)&=(a^b)
647	add	r7,r7,r11			@ d+=h
648	eor	r3,r3,r5			@ Maj(a,b,c)
649	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
650	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
651#if __ARM_ARCH__>=7
652	@ ldr	r2,[r1],#4			@ 9
653# if 9==15
654	str	r1,[sp,#17*4]			@ make room for r1
655# endif
656	eor	r0,r7,r7,ror#5
657	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
658	eor	r0,r0,r7,ror#19	@ Sigma1(e)
659# ifndef __ARMEB__
660	rev	r2,r2
661# endif
662#else
663	@ ldrb	r2,[r1,#3]			@ 9
664	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
665	ldrb	r3,[r1,#2]
666	ldrb	r0,[r1,#1]
667	orr	r2,r2,r3,lsl#8
668	ldrb	r3,[r1],#4
669	orr	r2,r2,r0,lsl#16
670# if 9==15
671	str	r1,[sp,#17*4]			@ make room for r1
672# endif
673	eor	r0,r7,r7,ror#5
674	orr	r2,r2,r3,lsl#24
675	eor	r0,r0,r7,ror#19	@ Sigma1(e)
676#endif
677	ldr	r3,[r14],#4			@ *K256++
678	add	r10,r10,r2			@ h+=X[i]
679	str	r2,[sp,#9*4]
680	eor	r2,r8,r9
681	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
682	and	r2,r2,r7
683	add	r10,r10,r3			@ h+=K256[i]
684	eor	r2,r2,r9			@ Ch(e,f,g)
685	eor	r0,r11,r11,ror#11
686	add	r10,r10,r2			@ h+=Ch(e,f,g)
687#if 9==31
688	and	r3,r3,#0xff
689	cmp	r3,#0xf2			@ done?
690#endif
691#if 9<15
692# if __ARM_ARCH__>=7
693	ldr	r2,[r1],#4			@ prefetch
694# else
695	ldrb	r2,[r1,#3]
696# endif
697	eor	r3,r11,r4			@ a^b, b^c in next round
698#else
699	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
700	eor	r3,r11,r4			@ a^b, b^c in next round
701	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
702#endif
703	eor	r0,r0,r11,ror#20	@ Sigma0(a)
704	and	r12,r12,r3			@ (b^c)&=(a^b)
705	add	r6,r6,r10			@ d+=h
706	eor	r12,r12,r4			@ Maj(a,b,c)
707	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
708	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
709#if __ARM_ARCH__>=7
710	@ ldr	r2,[r1],#4			@ 10
711# if 10==15
712	str	r1,[sp,#17*4]			@ make room for r1
713# endif
714	eor	r0,r6,r6,ror#5
715	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
716	eor	r0,r0,r6,ror#19	@ Sigma1(e)
717# ifndef __ARMEB__
718	rev	r2,r2
719# endif
720#else
721	@ ldrb	r2,[r1,#3]			@ 10
722	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
723	ldrb	r12,[r1,#2]
724	ldrb	r0,[r1,#1]
725	orr	r2,r2,r12,lsl#8
726	ldrb	r12,[r1],#4
727	orr	r2,r2,r0,lsl#16
728# if 10==15
729	str	r1,[sp,#17*4]			@ make room for r1
730# endif
731	eor	r0,r6,r6,ror#5
732	orr	r2,r2,r12,lsl#24
733	eor	r0,r0,r6,ror#19	@ Sigma1(e)
734#endif
735	ldr	r12,[r14],#4			@ *K256++
736	add	r9,r9,r2			@ h+=X[i]
737	str	r2,[sp,#10*4]
738	eor	r2,r7,r8
739	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
740	and	r2,r2,r6
741	add	r9,r9,r12			@ h+=K256[i]
742	eor	r2,r2,r8			@ Ch(e,f,g)
743	eor	r0,r10,r10,ror#11
744	add	r9,r9,r2			@ h+=Ch(e,f,g)
745#if 10==31
746	and	r12,r12,#0xff
747	cmp	r12,#0xf2			@ done?
748#endif
749#if 10<15
750# if __ARM_ARCH__>=7
751	ldr	r2,[r1],#4			@ prefetch
752# else
753	ldrb	r2,[r1,#3]
754# endif
755	eor	r12,r10,r11			@ a^b, b^c in next round
756#else
757	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
758	eor	r12,r10,r11			@ a^b, b^c in next round
759	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
760#endif
761	eor	r0,r0,r10,ror#20	@ Sigma0(a)
762	and	r3,r3,r12			@ (b^c)&=(a^b)
763	add	r5,r5,r9			@ d+=h
764	eor	r3,r3,r11			@ Maj(a,b,c)
765	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
766	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
767#if __ARM_ARCH__>=7
768	@ ldr	r2,[r1],#4			@ 11
769# if 11==15
770	str	r1,[sp,#17*4]			@ make room for r1
771# endif
772	eor	r0,r5,r5,ror#5
773	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
774	eor	r0,r0,r5,ror#19	@ Sigma1(e)
775# ifndef __ARMEB__
776	rev	r2,r2
777# endif
778#else
779	@ ldrb	r2,[r1,#3]			@ 11
780	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
781	ldrb	r3,[r1,#2]
782	ldrb	r0,[r1,#1]
783	orr	r2,r2,r3,lsl#8
784	ldrb	r3,[r1],#4
785	orr	r2,r2,r0,lsl#16
786# if 11==15
787	str	r1,[sp,#17*4]			@ make room for r1
788# endif
789	eor	r0,r5,r5,ror#5
790	orr	r2,r2,r3,lsl#24
791	eor	r0,r0,r5,ror#19	@ Sigma1(e)
792#endif
793	ldr	r3,[r14],#4			@ *K256++
794	add	r8,r8,r2			@ h+=X[i]
795	str	r2,[sp,#11*4]
796	eor	r2,r6,r7
797	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
798	and	r2,r2,r5
799	add	r8,r8,r3			@ h+=K256[i]
800	eor	r2,r2,r7			@ Ch(e,f,g)
801	eor	r0,r9,r9,ror#11
802	add	r8,r8,r2			@ h+=Ch(e,f,g)
803#if 11==31
804	and	r3,r3,#0xff
805	cmp	r3,#0xf2			@ done?
806#endif
807#if 11<15
808# if __ARM_ARCH__>=7
809	ldr	r2,[r1],#4			@ prefetch
810# else
811	ldrb	r2,[r1,#3]
812# endif
813	eor	r3,r9,r10			@ a^b, b^c in next round
814#else
815	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
816	eor	r3,r9,r10			@ a^b, b^c in next round
817	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
818#endif
819	eor	r0,r0,r9,ror#20	@ Sigma0(a)
820	and	r12,r12,r3			@ (b^c)&=(a^b)
821	add	r4,r4,r8			@ d+=h
822	eor	r12,r12,r10			@ Maj(a,b,c)
823	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
824	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
825#if __ARM_ARCH__>=7
826	@ ldr	r2,[r1],#4			@ 12
827# if 12==15
828	str	r1,[sp,#17*4]			@ make room for r1
829# endif
830	eor	r0,r4,r4,ror#5
831	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
832	eor	r0,r0,r4,ror#19	@ Sigma1(e)
833# ifndef __ARMEB__
834	rev	r2,r2
835# endif
836#else
837	@ ldrb	r2,[r1,#3]			@ 12
838	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
839	ldrb	r12,[r1,#2]
840	ldrb	r0,[r1,#1]
841	orr	r2,r2,r12,lsl#8
842	ldrb	r12,[r1],#4
843	orr	r2,r2,r0,lsl#16
844# if 12==15
845	str	r1,[sp,#17*4]			@ make room for r1
846# endif
847	eor	r0,r4,r4,ror#5
848	orr	r2,r2,r12,lsl#24
849	eor	r0,r0,r4,ror#19	@ Sigma1(e)
850#endif
851	ldr	r12,[r14],#4			@ *K256++
852	add	r7,r7,r2			@ h+=X[i]
853	str	r2,[sp,#12*4]
854	eor	r2,r5,r6
855	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
856	and	r2,r2,r4
857	add	r7,r7,r12			@ h+=K256[i]
858	eor	r2,r2,r6			@ Ch(e,f,g)
859	eor	r0,r8,r8,ror#11
860	add	r7,r7,r2			@ h+=Ch(e,f,g)
861#if 12==31
862	and	r12,r12,#0xff
863	cmp	r12,#0xf2			@ done?
864#endif
865#if 12<15
866# if __ARM_ARCH__>=7
867	ldr	r2,[r1],#4			@ prefetch
868# else
869	ldrb	r2,[r1,#3]
870# endif
871	eor	r12,r8,r9			@ a^b, b^c in next round
872#else
873	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
874	eor	r12,r8,r9			@ a^b, b^c in next round
875	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
876#endif
877	eor	r0,r0,r8,ror#20	@ Sigma0(a)
878	and	r3,r3,r12			@ (b^c)&=(a^b)
879	add	r11,r11,r7			@ d+=h
880	eor	r3,r3,r9			@ Maj(a,b,c)
881	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
882	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
883#if __ARM_ARCH__>=7
884	@ ldr	r2,[r1],#4			@ 13
885# if 13==15
886	str	r1,[sp,#17*4]			@ make room for r1
887# endif
888	eor	r0,r11,r11,ror#5
889	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
890	eor	r0,r0,r11,ror#19	@ Sigma1(e)
891# ifndef __ARMEB__
892	rev	r2,r2
893# endif
894#else
895	@ ldrb	r2,[r1,#3]			@ 13
896	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
897	ldrb	r3,[r1,#2]
898	ldrb	r0,[r1,#1]
899	orr	r2,r2,r3,lsl#8
900	ldrb	r3,[r1],#4
901	orr	r2,r2,r0,lsl#16
902# if 13==15
903	str	r1,[sp,#17*4]			@ make room for r1
904# endif
905	eor	r0,r11,r11,ror#5
906	orr	r2,r2,r3,lsl#24
907	eor	r0,r0,r11,ror#19	@ Sigma1(e)
908#endif
909	ldr	r3,[r14],#4			@ *K256++
910	add	r6,r6,r2			@ h+=X[i]
911	str	r2,[sp,#13*4]
912	eor	r2,r4,r5
913	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
914	and	r2,r2,r11
915	add	r6,r6,r3			@ h+=K256[i]
916	eor	r2,r2,r5			@ Ch(e,f,g)
917	eor	r0,r7,r7,ror#11
918	add	r6,r6,r2			@ h+=Ch(e,f,g)
919#if 13==31
920	and	r3,r3,#0xff
921	cmp	r3,#0xf2			@ done?
922#endif
923#if 13<15
924# if __ARM_ARCH__>=7
925	ldr	r2,[r1],#4			@ prefetch
926# else
927	ldrb	r2,[r1,#3]
928# endif
929	eor	r3,r7,r8			@ a^b, b^c in next round
930#else
931	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
932	eor	r3,r7,r8			@ a^b, b^c in next round
933	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
934#endif
935	eor	r0,r0,r7,ror#20	@ Sigma0(a)
936	and	r12,r12,r3			@ (b^c)&=(a^b)
937	add	r10,r10,r6			@ d+=h
938	eor	r12,r12,r8			@ Maj(a,b,c)
939	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
940	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
941#if __ARM_ARCH__>=7
942	@ ldr	r2,[r1],#4			@ 14
943# if 14==15
944	str	r1,[sp,#17*4]			@ make room for r1
945# endif
946	eor	r0,r10,r10,ror#5
947	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
948	eor	r0,r0,r10,ror#19	@ Sigma1(e)
949# ifndef __ARMEB__
950	rev	r2,r2
951# endif
952#else
953	@ ldrb	r2,[r1,#3]			@ 14
954	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
955	ldrb	r12,[r1,#2]
956	ldrb	r0,[r1,#1]
957	orr	r2,r2,r12,lsl#8
958	ldrb	r12,[r1],#4
959	orr	r2,r2,r0,lsl#16
960# if 14==15
961	str	r1,[sp,#17*4]			@ make room for r1
962# endif
963	eor	r0,r10,r10,ror#5
964	orr	r2,r2,r12,lsl#24
965	eor	r0,r0,r10,ror#19	@ Sigma1(e)
966#endif
967	ldr	r12,[r14],#4			@ *K256++
968	add	r5,r5,r2			@ h+=X[i]
969	str	r2,[sp,#14*4]
970	eor	r2,r11,r4
971	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
972	and	r2,r2,r10
973	add	r5,r5,r12			@ h+=K256[i]
974	eor	r2,r2,r4			@ Ch(e,f,g)
975	eor	r0,r6,r6,ror#11
976	add	r5,r5,r2			@ h+=Ch(e,f,g)
977#if 14==31
978	and	r12,r12,#0xff
979	cmp	r12,#0xf2			@ done?
980#endif
981#if 14<15
982# if __ARM_ARCH__>=7
983	ldr	r2,[r1],#4			@ prefetch
984# else
985	ldrb	r2,[r1,#3]
986# endif
987	eor	r12,r6,r7			@ a^b, b^c in next round
988#else
989	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
990	eor	r12,r6,r7			@ a^b, b^c in next round
991	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
992#endif
993	eor	r0,r0,r6,ror#20	@ Sigma0(a)
994	and	r3,r3,r12			@ (b^c)&=(a^b)
995	add	r9,r9,r5			@ d+=h
996	eor	r3,r3,r7			@ Maj(a,b,c)
997	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
998	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
999#if __ARM_ARCH__>=7
1000	@ ldr	r2,[r1],#4			@ 15
1001# if 15==15
1002	str	r1,[sp,#17*4]			@ make room for r1
1003# endif
1004	eor	r0,r9,r9,ror#5
1005	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1006	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1007# ifndef __ARMEB__
1008	rev	r2,r2
1009# endif
1010#else
1011	@ ldrb	r2,[r1,#3]			@ 15
1012	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1013	ldrb	r3,[r1,#2]
1014	ldrb	r0,[r1,#1]
1015	orr	r2,r2,r3,lsl#8
1016	ldrb	r3,[r1],#4
1017	orr	r2,r2,r0,lsl#16
1018# if 15==15
1019	str	r1,[sp,#17*4]			@ make room for r1
1020# endif
1021	eor	r0,r9,r9,ror#5
1022	orr	r2,r2,r3,lsl#24
1023	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1024#endif
1025	ldr	r3,[r14],#4			@ *K256++
1026	add	r4,r4,r2			@ h+=X[i]
1027	str	r2,[sp,#15*4]
1028	eor	r2,r10,r11
1029	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1030	and	r2,r2,r9
1031	add	r4,r4,r3			@ h+=K256[i]
1032	eor	r2,r2,r11			@ Ch(e,f,g)
1033	eor	r0,r5,r5,ror#11
1034	add	r4,r4,r2			@ h+=Ch(e,f,g)
1035#if 15==31
1036	and	r3,r3,#0xff
1037	cmp	r3,#0xf2			@ done?
1038#endif
1039#if 15<15
1040# if __ARM_ARCH__>=7
1041	ldr	r2,[r1],#4			@ prefetch
1042# else
1043	ldrb	r2,[r1,#3]
1044# endif
1045	eor	r3,r5,r6			@ a^b, b^c in next round
1046#else
1047	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1048	eor	r3,r5,r6			@ a^b, b^c in next round
1049	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1050#endif
1051	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1052	and	r12,r12,r3			@ (b^c)&=(a^b)
1053	add	r8,r8,r4			@ d+=h
1054	eor	r12,r12,r6			@ Maj(a,b,c)
1055	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1056	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1057Lrounds_16_xx:
1058	@ ldr	r2,[sp,#1*4]		@ 16
1059	@ ldr	r1,[sp,#14*4]
1060	mov	r0,r2,ror#7
1061	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1062	mov	r12,r1,ror#17
1063	eor	r0,r0,r2,ror#18
1064	eor	r12,r12,r1,ror#19
1065	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1066	ldr	r2,[sp,#0*4]
1067	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1068	ldr	r1,[sp,#9*4]
1069
1070	add	r12,r12,r0
1071	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1072	add	r2,r2,r12
1073	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1074	add	r2,r2,r1			@ X[i]
1075	ldr	r12,[r14],#4			@ *K256++
1076	add	r11,r11,r2			@ h+=X[i]
1077	str	r2,[sp,#0*4]
1078	eor	r2,r9,r10
1079	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1080	and	r2,r2,r8
1081	add	r11,r11,r12			@ h+=K256[i]
1082	eor	r2,r2,r10			@ Ch(e,f,g)
1083	eor	r0,r4,r4,ror#11
1084	add	r11,r11,r2			@ h+=Ch(e,f,g)
1085#if 16==31
1086	and	r12,r12,#0xff
1087	cmp	r12,#0xf2			@ done?
1088#endif
1089#if 16<15
1090# if __ARM_ARCH__>=7
1091	ldr	r2,[r1],#4			@ prefetch
1092# else
1093	ldrb	r2,[r1,#3]
1094# endif
1095	eor	r12,r4,r5			@ a^b, b^c in next round
1096#else
1097	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1098	eor	r12,r4,r5			@ a^b, b^c in next round
1099	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1100#endif
1101	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1102	and	r3,r3,r12			@ (b^c)&=(a^b)
1103	add	r7,r7,r11			@ d+=h
1104	eor	r3,r3,r5			@ Maj(a,b,c)
1105	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1106	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1107	@ ldr	r2,[sp,#2*4]		@ 17
1108	@ ldr	r1,[sp,#15*4]
1109	mov	r0,r2,ror#7
1110	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1111	mov	r3,r1,ror#17
1112	eor	r0,r0,r2,ror#18
1113	eor	r3,r3,r1,ror#19
1114	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1115	ldr	r2,[sp,#1*4]
1116	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1117	ldr	r1,[sp,#10*4]
1118
1119	add	r3,r3,r0
1120	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1121	add	r2,r2,r3
1122	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1123	add	r2,r2,r1			@ X[i]
1124	ldr	r3,[r14],#4			@ *K256++
1125	add	r10,r10,r2			@ h+=X[i]
1126	str	r2,[sp,#1*4]
1127	eor	r2,r8,r9
1128	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1129	and	r2,r2,r7
1130	add	r10,r10,r3			@ h+=K256[i]
1131	eor	r2,r2,r9			@ Ch(e,f,g)
1132	eor	r0,r11,r11,ror#11
1133	add	r10,r10,r2			@ h+=Ch(e,f,g)
1134#if 17==31
1135	and	r3,r3,#0xff
1136	cmp	r3,#0xf2			@ done?
1137#endif
1138#if 17<15
1139# if __ARM_ARCH__>=7
1140	ldr	r2,[r1],#4			@ prefetch
1141# else
1142	ldrb	r2,[r1,#3]
1143# endif
1144	eor	r3,r11,r4			@ a^b, b^c in next round
1145#else
1146	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1147	eor	r3,r11,r4			@ a^b, b^c in next round
1148	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1149#endif
1150	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1151	and	r12,r12,r3			@ (b^c)&=(a^b)
1152	add	r6,r6,r10			@ d+=h
1153	eor	r12,r12,r4			@ Maj(a,b,c)
1154	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1155	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1156	@ ldr	r2,[sp,#3*4]		@ 18
1157	@ ldr	r1,[sp,#0*4]
1158	mov	r0,r2,ror#7
1159	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1160	mov	r12,r1,ror#17
1161	eor	r0,r0,r2,ror#18
1162	eor	r12,r12,r1,ror#19
1163	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1164	ldr	r2,[sp,#2*4]
1165	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1166	ldr	r1,[sp,#11*4]
1167
1168	add	r12,r12,r0
1169	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1170	add	r2,r2,r12
1171	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1172	add	r2,r2,r1			@ X[i]
1173	ldr	r12,[r14],#4			@ *K256++
1174	add	r9,r9,r2			@ h+=X[i]
1175	str	r2,[sp,#2*4]
1176	eor	r2,r7,r8
1177	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1178	and	r2,r2,r6
1179	add	r9,r9,r12			@ h+=K256[i]
1180	eor	r2,r2,r8			@ Ch(e,f,g)
1181	eor	r0,r10,r10,ror#11
1182	add	r9,r9,r2			@ h+=Ch(e,f,g)
1183#if 18==31
1184	and	r12,r12,#0xff
1185	cmp	r12,#0xf2			@ done?
1186#endif
1187#if 18<15
1188# if __ARM_ARCH__>=7
1189	ldr	r2,[r1],#4			@ prefetch
1190# else
1191	ldrb	r2,[r1,#3]
1192# endif
1193	eor	r12,r10,r11			@ a^b, b^c in next round
1194#else
1195	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1196	eor	r12,r10,r11			@ a^b, b^c in next round
1197	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1198#endif
1199	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1200	and	r3,r3,r12			@ (b^c)&=(a^b)
1201	add	r5,r5,r9			@ d+=h
1202	eor	r3,r3,r11			@ Maj(a,b,c)
1203	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1204	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1205	@ ldr	r2,[sp,#4*4]		@ 19
1206	@ ldr	r1,[sp,#1*4]
1207	mov	r0,r2,ror#7
1208	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1209	mov	r3,r1,ror#17
1210	eor	r0,r0,r2,ror#18
1211	eor	r3,r3,r1,ror#19
1212	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1213	ldr	r2,[sp,#3*4]
1214	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1215	ldr	r1,[sp,#12*4]
1216
1217	add	r3,r3,r0
1218	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1219	add	r2,r2,r3
1220	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1221	add	r2,r2,r1			@ X[i]
1222	ldr	r3,[r14],#4			@ *K256++
1223	add	r8,r8,r2			@ h+=X[i]
1224	str	r2,[sp,#3*4]
1225	eor	r2,r6,r7
1226	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1227	and	r2,r2,r5
1228	add	r8,r8,r3			@ h+=K256[i]
1229	eor	r2,r2,r7			@ Ch(e,f,g)
1230	eor	r0,r9,r9,ror#11
1231	add	r8,r8,r2			@ h+=Ch(e,f,g)
1232#if 19==31
1233	and	r3,r3,#0xff
1234	cmp	r3,#0xf2			@ done?
1235#endif
1236#if 19<15
1237# if __ARM_ARCH__>=7
1238	ldr	r2,[r1],#4			@ prefetch
1239# else
1240	ldrb	r2,[r1,#3]
1241# endif
1242	eor	r3,r9,r10			@ a^b, b^c in next round
1243#else
1244	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1245	eor	r3,r9,r10			@ a^b, b^c in next round
1246	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1247#endif
1248	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1249	and	r12,r12,r3			@ (b^c)&=(a^b)
1250	add	r4,r4,r8			@ d+=h
1251	eor	r12,r12,r10			@ Maj(a,b,c)
1252	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1253	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1254	@ ldr	r2,[sp,#5*4]		@ 20
1255	@ ldr	r1,[sp,#2*4]
1256	mov	r0,r2,ror#7
1257	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1258	mov	r12,r1,ror#17
1259	eor	r0,r0,r2,ror#18
1260	eor	r12,r12,r1,ror#19
1261	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1262	ldr	r2,[sp,#4*4]
1263	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1264	ldr	r1,[sp,#13*4]
1265
1266	add	r12,r12,r0
1267	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1268	add	r2,r2,r12
1269	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1270	add	r2,r2,r1			@ X[i]
1271	ldr	r12,[r14],#4			@ *K256++
1272	add	r7,r7,r2			@ h+=X[i]
1273	str	r2,[sp,#4*4]
1274	eor	r2,r5,r6
1275	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1276	and	r2,r2,r4
1277	add	r7,r7,r12			@ h+=K256[i]
1278	eor	r2,r2,r6			@ Ch(e,f,g)
1279	eor	r0,r8,r8,ror#11
1280	add	r7,r7,r2			@ h+=Ch(e,f,g)
1281#if 20==31
1282	and	r12,r12,#0xff
1283	cmp	r12,#0xf2			@ done?
1284#endif
1285#if 20<15
1286# if __ARM_ARCH__>=7
1287	ldr	r2,[r1],#4			@ prefetch
1288# else
1289	ldrb	r2,[r1,#3]
1290# endif
1291	eor	r12,r8,r9			@ a^b, b^c in next round
1292#else
1293	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1294	eor	r12,r8,r9			@ a^b, b^c in next round
1295	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1296#endif
1297	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1298	and	r3,r3,r12			@ (b^c)&=(a^b)
1299	add	r11,r11,r7			@ d+=h
1300	eor	r3,r3,r9			@ Maj(a,b,c)
1301	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1302	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1303	@ ldr	r2,[sp,#6*4]		@ 21
1304	@ ldr	r1,[sp,#3*4]
1305	mov	r0,r2,ror#7
1306	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1307	mov	r3,r1,ror#17
1308	eor	r0,r0,r2,ror#18
1309	eor	r3,r3,r1,ror#19
1310	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1311	ldr	r2,[sp,#5*4]
1312	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1313	ldr	r1,[sp,#14*4]
1314
1315	add	r3,r3,r0
1316	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1317	add	r2,r2,r3
1318	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1319	add	r2,r2,r1			@ X[i]
1320	ldr	r3,[r14],#4			@ *K256++
1321	add	r6,r6,r2			@ h+=X[i]
1322	str	r2,[sp,#5*4]
1323	eor	r2,r4,r5
1324	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1325	and	r2,r2,r11
1326	add	r6,r6,r3			@ h+=K256[i]
1327	eor	r2,r2,r5			@ Ch(e,f,g)
1328	eor	r0,r7,r7,ror#11
1329	add	r6,r6,r2			@ h+=Ch(e,f,g)
1330#if 21==31
1331	and	r3,r3,#0xff
1332	cmp	r3,#0xf2			@ done?
1333#endif
1334#if 21<15
1335# if __ARM_ARCH__>=7
1336	ldr	r2,[r1],#4			@ prefetch
1337# else
1338	ldrb	r2,[r1,#3]
1339# endif
1340	eor	r3,r7,r8			@ a^b, b^c in next round
1341#else
1342	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1343	eor	r3,r7,r8			@ a^b, b^c in next round
1344	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1345#endif
1346	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1347	and	r12,r12,r3			@ (b^c)&=(a^b)
1348	add	r10,r10,r6			@ d+=h
1349	eor	r12,r12,r8			@ Maj(a,b,c)
1350	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1351	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1352	@ ldr	r2,[sp,#7*4]		@ 22
1353	@ ldr	r1,[sp,#4*4]
1354	mov	r0,r2,ror#7
1355	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1356	mov	r12,r1,ror#17
1357	eor	r0,r0,r2,ror#18
1358	eor	r12,r12,r1,ror#19
1359	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1360	ldr	r2,[sp,#6*4]
1361	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1362	ldr	r1,[sp,#15*4]
1363
1364	add	r12,r12,r0
1365	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1366	add	r2,r2,r12
1367	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1368	add	r2,r2,r1			@ X[i]
1369	ldr	r12,[r14],#4			@ *K256++
1370	add	r5,r5,r2			@ h+=X[i]
1371	str	r2,[sp,#6*4]
1372	eor	r2,r11,r4
1373	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1374	and	r2,r2,r10
1375	add	r5,r5,r12			@ h+=K256[i]
1376	eor	r2,r2,r4			@ Ch(e,f,g)
1377	eor	r0,r6,r6,ror#11
1378	add	r5,r5,r2			@ h+=Ch(e,f,g)
1379#if 22==31
1380	and	r12,r12,#0xff
1381	cmp	r12,#0xf2			@ done?
1382#endif
1383#if 22<15
1384# if __ARM_ARCH__>=7
1385	ldr	r2,[r1],#4			@ prefetch
1386# else
1387	ldrb	r2,[r1,#3]
1388# endif
1389	eor	r12,r6,r7			@ a^b, b^c in next round
1390#else
1391	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1392	eor	r12,r6,r7			@ a^b, b^c in next round
1393	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1394#endif
1395	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1396	and	r3,r3,r12			@ (b^c)&=(a^b)
1397	add	r9,r9,r5			@ d+=h
1398	eor	r3,r3,r7			@ Maj(a,b,c)
1399	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1400	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1401	@ ldr	r2,[sp,#8*4]		@ 23
1402	@ ldr	r1,[sp,#5*4]
1403	mov	r0,r2,ror#7
1404	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1405	mov	r3,r1,ror#17
1406	eor	r0,r0,r2,ror#18
1407	eor	r3,r3,r1,ror#19
1408	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1409	ldr	r2,[sp,#7*4]
1410	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1411	ldr	r1,[sp,#0*4]
1412
1413	add	r3,r3,r0
1414	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1415	add	r2,r2,r3
1416	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1417	add	r2,r2,r1			@ X[i]
1418	ldr	r3,[r14],#4			@ *K256++
1419	add	r4,r4,r2			@ h+=X[i]
1420	str	r2,[sp,#7*4]
1421	eor	r2,r10,r11
1422	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1423	and	r2,r2,r9
1424	add	r4,r4,r3			@ h+=K256[i]
1425	eor	r2,r2,r11			@ Ch(e,f,g)
1426	eor	r0,r5,r5,ror#11
1427	add	r4,r4,r2			@ h+=Ch(e,f,g)
1428#if 23==31
1429	and	r3,r3,#0xff
1430	cmp	r3,#0xf2			@ done?
1431#endif
1432#if 23<15
1433# if __ARM_ARCH__>=7
1434	ldr	r2,[r1],#4			@ prefetch
1435# else
1436	ldrb	r2,[r1,#3]
1437# endif
1438	eor	r3,r5,r6			@ a^b, b^c in next round
1439#else
1440	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1441	eor	r3,r5,r6			@ a^b, b^c in next round
1442	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1443#endif
1444	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1445	and	r12,r12,r3			@ (b^c)&=(a^b)
1446	add	r8,r8,r4			@ d+=h
1447	eor	r12,r12,r6			@ Maj(a,b,c)
1448	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1449	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1450	@ ldr	r2,[sp,#9*4]		@ 24
1451	@ ldr	r1,[sp,#6*4]
1452	mov	r0,r2,ror#7
1453	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1454	mov	r12,r1,ror#17
1455	eor	r0,r0,r2,ror#18
1456	eor	r12,r12,r1,ror#19
1457	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1458	ldr	r2,[sp,#8*4]
1459	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1460	ldr	r1,[sp,#1*4]
1461
1462	add	r12,r12,r0
1463	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1464	add	r2,r2,r12
1465	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1466	add	r2,r2,r1			@ X[i]
1467	ldr	r12,[r14],#4			@ *K256++
1468	add	r11,r11,r2			@ h+=X[i]
1469	str	r2,[sp,#8*4]
1470	eor	r2,r9,r10
1471	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1472	and	r2,r2,r8
1473	add	r11,r11,r12			@ h+=K256[i]
1474	eor	r2,r2,r10			@ Ch(e,f,g)
1475	eor	r0,r4,r4,ror#11
1476	add	r11,r11,r2			@ h+=Ch(e,f,g)
1477#if 24==31
1478	and	r12,r12,#0xff
1479	cmp	r12,#0xf2			@ done?
1480#endif
1481#if 24<15
1482# if __ARM_ARCH__>=7
1483	ldr	r2,[r1],#4			@ prefetch
1484# else
1485	ldrb	r2,[r1,#3]
1486# endif
1487	eor	r12,r4,r5			@ a^b, b^c in next round
1488#else
1489	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1490	eor	r12,r4,r5			@ a^b, b^c in next round
1491	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1492#endif
1493	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1494	and	r3,r3,r12			@ (b^c)&=(a^b)
1495	add	r7,r7,r11			@ d+=h
1496	eor	r3,r3,r5			@ Maj(a,b,c)
1497	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1498	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1499	@ ldr	r2,[sp,#10*4]		@ 25
1500	@ ldr	r1,[sp,#7*4]
1501	mov	r0,r2,ror#7
1502	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1503	mov	r3,r1,ror#17
1504	eor	r0,r0,r2,ror#18
1505	eor	r3,r3,r1,ror#19
1506	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1507	ldr	r2,[sp,#9*4]
1508	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1509	ldr	r1,[sp,#2*4]
1510
1511	add	r3,r3,r0
1512	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1513	add	r2,r2,r3
1514	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1515	add	r2,r2,r1			@ X[i]
1516	ldr	r3,[r14],#4			@ *K256++
1517	add	r10,r10,r2			@ h+=X[i]
1518	str	r2,[sp,#9*4]
1519	eor	r2,r8,r9
1520	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1521	and	r2,r2,r7
1522	add	r10,r10,r3			@ h+=K256[i]
1523	eor	r2,r2,r9			@ Ch(e,f,g)
1524	eor	r0,r11,r11,ror#11
1525	add	r10,r10,r2			@ h+=Ch(e,f,g)
1526#if 25==31
1527	and	r3,r3,#0xff
1528	cmp	r3,#0xf2			@ done?
1529#endif
1530#if 25<15
1531# if __ARM_ARCH__>=7
1532	ldr	r2,[r1],#4			@ prefetch
1533# else
1534	ldrb	r2,[r1,#3]
1535# endif
1536	eor	r3,r11,r4			@ a^b, b^c in next round
1537#else
1538	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1539	eor	r3,r11,r4			@ a^b, b^c in next round
1540	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1541#endif
1542	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1543	and	r12,r12,r3			@ (b^c)&=(a^b)
1544	add	r6,r6,r10			@ d+=h
1545	eor	r12,r12,r4			@ Maj(a,b,c)
1546	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1547	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1548	@ ldr	r2,[sp,#11*4]		@ 26
1549	@ ldr	r1,[sp,#8*4]
1550	mov	r0,r2,ror#7
1551	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1552	mov	r12,r1,ror#17
1553	eor	r0,r0,r2,ror#18
1554	eor	r12,r12,r1,ror#19
1555	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1556	ldr	r2,[sp,#10*4]
1557	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1558	ldr	r1,[sp,#3*4]
1559
1560	add	r12,r12,r0
1561	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1562	add	r2,r2,r12
1563	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1564	add	r2,r2,r1			@ X[i]
1565	ldr	r12,[r14],#4			@ *K256++
1566	add	r9,r9,r2			@ h+=X[i]
1567	str	r2,[sp,#10*4]
1568	eor	r2,r7,r8
1569	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1570	and	r2,r2,r6
1571	add	r9,r9,r12			@ h+=K256[i]
1572	eor	r2,r2,r8			@ Ch(e,f,g)
1573	eor	r0,r10,r10,ror#11
1574	add	r9,r9,r2			@ h+=Ch(e,f,g)
1575#if 26==31
1576	and	r12,r12,#0xff
1577	cmp	r12,#0xf2			@ done?
1578#endif
1579#if 26<15
1580# if __ARM_ARCH__>=7
1581	ldr	r2,[r1],#4			@ prefetch
1582# else
1583	ldrb	r2,[r1,#3]
1584# endif
1585	eor	r12,r10,r11			@ a^b, b^c in next round
1586#else
1587	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1588	eor	r12,r10,r11			@ a^b, b^c in next round
1589	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1590#endif
1591	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1592	and	r3,r3,r12			@ (b^c)&=(a^b)
1593	add	r5,r5,r9			@ d+=h
1594	eor	r3,r3,r11			@ Maj(a,b,c)
1595	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1596	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1597	@ ldr	r2,[sp,#12*4]		@ 27
1598	@ ldr	r1,[sp,#9*4]
1599	mov	r0,r2,ror#7
1600	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1601	mov	r3,r1,ror#17
1602	eor	r0,r0,r2,ror#18
1603	eor	r3,r3,r1,ror#19
1604	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1605	ldr	r2,[sp,#11*4]
1606	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1607	ldr	r1,[sp,#4*4]
1608
1609	add	r3,r3,r0
1610	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1611	add	r2,r2,r3
1612	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1613	add	r2,r2,r1			@ X[i]
1614	ldr	r3,[r14],#4			@ *K256++
1615	add	r8,r8,r2			@ h+=X[i]
1616	str	r2,[sp,#11*4]
1617	eor	r2,r6,r7
1618	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1619	and	r2,r2,r5
1620	add	r8,r8,r3			@ h+=K256[i]
1621	eor	r2,r2,r7			@ Ch(e,f,g)
1622	eor	r0,r9,r9,ror#11
1623	add	r8,r8,r2			@ h+=Ch(e,f,g)
1624#if 27==31
1625	and	r3,r3,#0xff
1626	cmp	r3,#0xf2			@ done?
1627#endif
1628#if 27<15
1629# if __ARM_ARCH__>=7
1630	ldr	r2,[r1],#4			@ prefetch
1631# else
1632	ldrb	r2,[r1,#3]
1633# endif
1634	eor	r3,r9,r10			@ a^b, b^c in next round
1635#else
1636	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1637	eor	r3,r9,r10			@ a^b, b^c in next round
1638	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1639#endif
1640	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1641	and	r12,r12,r3			@ (b^c)&=(a^b)
1642	add	r4,r4,r8			@ d+=h
1643	eor	r12,r12,r10			@ Maj(a,b,c)
1644	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1645	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1646	@ ldr	r2,[sp,#13*4]		@ 28
1647	@ ldr	r1,[sp,#10*4]
1648	mov	r0,r2,ror#7
1649	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1650	mov	r12,r1,ror#17
1651	eor	r0,r0,r2,ror#18
1652	eor	r12,r12,r1,ror#19
1653	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1654	ldr	r2,[sp,#12*4]
1655	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1656	ldr	r1,[sp,#5*4]
1657
1658	add	r12,r12,r0
1659	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1660	add	r2,r2,r12
1661	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1662	add	r2,r2,r1			@ X[i]
1663	ldr	r12,[r14],#4			@ *K256++
1664	add	r7,r7,r2			@ h+=X[i]
1665	str	r2,[sp,#12*4]
1666	eor	r2,r5,r6
1667	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1668	and	r2,r2,r4
1669	add	r7,r7,r12			@ h+=K256[i]
1670	eor	r2,r2,r6			@ Ch(e,f,g)
1671	eor	r0,r8,r8,ror#11
1672	add	r7,r7,r2			@ h+=Ch(e,f,g)
1673#if 28==31
1674	and	r12,r12,#0xff
1675	cmp	r12,#0xf2			@ done?
1676#endif
1677#if 28<15
1678# if __ARM_ARCH__>=7
1679	ldr	r2,[r1],#4			@ prefetch
1680# else
1681	ldrb	r2,[r1,#3]
1682# endif
1683	eor	r12,r8,r9			@ a^b, b^c in next round
1684#else
1685	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1686	eor	r12,r8,r9			@ a^b, b^c in next round
1687	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1688#endif
1689	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1690	and	r3,r3,r12			@ (b^c)&=(a^b)
1691	add	r11,r11,r7			@ d+=h
1692	eor	r3,r3,r9			@ Maj(a,b,c)
1693	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1694	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1695	@ ldr	r2,[sp,#14*4]		@ 29
1696	@ ldr	r1,[sp,#11*4]
1697	mov	r0,r2,ror#7
1698	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1699	mov	r3,r1,ror#17
1700	eor	r0,r0,r2,ror#18
1701	eor	r3,r3,r1,ror#19
1702	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1703	ldr	r2,[sp,#13*4]
1704	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1705	ldr	r1,[sp,#6*4]
1706
1707	add	r3,r3,r0
1708	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1709	add	r2,r2,r3
1710	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1711	add	r2,r2,r1			@ X[i]
1712	ldr	r3,[r14],#4			@ *K256++
1713	add	r6,r6,r2			@ h+=X[i]
1714	str	r2,[sp,#13*4]
1715	eor	r2,r4,r5
1716	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1717	and	r2,r2,r11
1718	add	r6,r6,r3			@ h+=K256[i]
1719	eor	r2,r2,r5			@ Ch(e,f,g)
1720	eor	r0,r7,r7,ror#11
1721	add	r6,r6,r2			@ h+=Ch(e,f,g)
1722#if 29==31
1723	and	r3,r3,#0xff
1724	cmp	r3,#0xf2			@ done?
1725#endif
1726#if 29<15
1727# if __ARM_ARCH__>=7
1728	ldr	r2,[r1],#4			@ prefetch
1729# else
1730	ldrb	r2,[r1,#3]
1731# endif
1732	eor	r3,r7,r8			@ a^b, b^c in next round
1733#else
1734	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1735	eor	r3,r7,r8			@ a^b, b^c in next round
1736	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1737#endif
1738	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1739	and	r12,r12,r3			@ (b^c)&=(a^b)
1740	add	r10,r10,r6			@ d+=h
1741	eor	r12,r12,r8			@ Maj(a,b,c)
1742	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1743	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1744	@ ldr	r2,[sp,#15*4]		@ 30
1745	@ ldr	r1,[sp,#12*4]
1746	mov	r0,r2,ror#7
1747	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1748	mov	r12,r1,ror#17
1749	eor	r0,r0,r2,ror#18
1750	eor	r12,r12,r1,ror#19
1751	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1752	ldr	r2,[sp,#14*4]
1753	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1754	ldr	r1,[sp,#7*4]
1755
1756	add	r12,r12,r0
1757	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1758	add	r2,r2,r12
1759	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1760	add	r2,r2,r1			@ X[i]
1761	ldr	r12,[r14],#4			@ *K256++
1762	add	r5,r5,r2			@ h+=X[i]
1763	str	r2,[sp,#14*4]
1764	eor	r2,r11,r4
1765	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1766	and	r2,r2,r10
1767	add	r5,r5,r12			@ h+=K256[i]
1768	eor	r2,r2,r4			@ Ch(e,f,g)
1769	eor	r0,r6,r6,ror#11
1770	add	r5,r5,r2			@ h+=Ch(e,f,g)
1771#if 30==31
1772	and	r12,r12,#0xff
1773	cmp	r12,#0xf2			@ done?
1774#endif
1775#if 30<15
1776# if __ARM_ARCH__>=7
1777	ldr	r2,[r1],#4			@ prefetch
1778# else
1779	ldrb	r2,[r1,#3]
1780# endif
1781	eor	r12,r6,r7			@ a^b, b^c in next round
1782#else
1783	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1784	eor	r12,r6,r7			@ a^b, b^c in next round
1785	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1786#endif
1787	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1788	and	r3,r3,r12			@ (b^c)&=(a^b)
1789	add	r9,r9,r5			@ d+=h
1790	eor	r3,r3,r7			@ Maj(a,b,c)
1791	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1792	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1793	@ ldr	r2,[sp,#0*4]		@ 31
1794	@ ldr	r1,[sp,#13*4]
1795	mov	r0,r2,ror#7
1796	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1797	mov	r3,r1,ror#17
1798	eor	r0,r0,r2,ror#18
1799	eor	r3,r3,r1,ror#19
1800	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1801	ldr	r2,[sp,#15*4]
1802	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1803	ldr	r1,[sp,#8*4]
1804
1805	add	r3,r3,r0
1806	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1807	add	r2,r2,r3
1808	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1809	add	r2,r2,r1			@ X[i]
1810	ldr	r3,[r14],#4			@ *K256++
1811	add	r4,r4,r2			@ h+=X[i]
1812	str	r2,[sp,#15*4]
1813	eor	r2,r10,r11
1814	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1815	and	r2,r2,r9
1816	add	r4,r4,r3			@ h+=K256[i]
1817	eor	r2,r2,r11			@ Ch(e,f,g)
1818	eor	r0,r5,r5,ror#11
1819	add	r4,r4,r2			@ h+=Ch(e,f,g)
1820#if 31==31
1821	and	r3,r3,#0xff
1822	cmp	r3,#0xf2			@ done?
1823#endif
1824#if 31<15
1825# if __ARM_ARCH__>=7
1826	ldr	r2,[r1],#4			@ prefetch
1827# else
1828	ldrb	r2,[r1,#3]
1829# endif
1830	eor	r3,r5,r6			@ a^b, b^c in next round
1831#else
1832	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1833	eor	r3,r5,r6			@ a^b, b^c in next round
1834	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1835#endif
1836	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1837	and	r12,r12,r3			@ (b^c)&=(a^b)
1838	add	r8,r8,r4			@ d+=h
1839	eor	r12,r12,r6			@ Maj(a,b,c)
1840	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1841	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1842#if __ARM_ARCH__>=7
1843	ite	eq			@ Thumb2 thing, sanity check in ARM
1844#endif
1845	ldreq	r3,[sp,#16*4]		@ pull ctx
1846	bne	Lrounds_16_xx
1847
1848	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1849	ldr	r0,[r3,#0]
1850	ldr	r2,[r3,#4]
1851	ldr	r12,[r3,#8]
1852	add	r4,r4,r0
1853	ldr	r0,[r3,#12]
1854	add	r5,r5,r2
1855	ldr	r2,[r3,#16]
1856	add	r6,r6,r12
1857	ldr	r12,[r3,#20]
1858	add	r7,r7,r0
1859	ldr	r0,[r3,#24]
1860	add	r8,r8,r2
1861	ldr	r2,[r3,#28]
1862	add	r9,r9,r12
1863	ldr	r1,[sp,#17*4]		@ pull inp
1864	ldr	r12,[sp,#18*4]		@ pull inp+len
1865	add	r10,r10,r0
1866	add	r11,r11,r2
1867	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1868	cmp	r1,r12
1869	sub	r14,r14,#256	@ rewind Ktbl
1870	bne	Loop
1871
1872	add	sp,sp,#19*4	@ destroy frame
1873#if __ARM_ARCH__>=5
1874	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1875#else
1876	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1877	tst	lr,#1
1878	moveq	pc,lr			@ be binary compatible with V4, yet
1879.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1880#endif
1881
1882#if __ARM_MAX_ARCH__>=7
1883
1884
1885
1886.globl	_sha256_block_data_order_neon
1887.private_extern	_sha256_block_data_order_neon
1888#ifdef __thumb2__
1889.thumb_func	_sha256_block_data_order_neon
1890#endif
1891.align	5
1892.skip	16
1893_sha256_block_data_order_neon:
1894LNEON:
1895	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1896
1897	sub	r11,sp,#16*4+16
1898	adr	r14,K256
1899	bic	r11,r11,#15		@ align for 128-bit stores
1900	mov	r12,sp
1901	mov	sp,r11			@ alloca
1902	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1903
1904	vld1.8	{q0},[r1]!
1905	vld1.8	{q1},[r1]!
1906	vld1.8	{q2},[r1]!
1907	vld1.8	{q3},[r1]!
1908	vld1.32	{q8},[r14,:128]!
1909	vld1.32	{q9},[r14,:128]!
1910	vld1.32	{q10},[r14,:128]!
1911	vld1.32	{q11},[r14,:128]!
1912	vrev32.8	q0,q0		@ yes, even on
1913	str	r0,[sp,#64]
1914	vrev32.8	q1,q1		@ big-endian
1915	str	r1,[sp,#68]
1916	mov	r1,sp
1917	vrev32.8	q2,q2
1918	str	r2,[sp,#72]
1919	vrev32.8	q3,q3
1920	str	r12,[sp,#76]		@ save original sp
1921	vadd.i32	q8,q8,q0
1922	vadd.i32	q9,q9,q1
1923	vst1.32	{q8},[r1,:128]!
1924	vadd.i32	q10,q10,q2
1925	vst1.32	{q9},[r1,:128]!
1926	vadd.i32	q11,q11,q3
1927	vst1.32	{q10},[r1,:128]!
1928	vst1.32	{q11},[r1,:128]!
1929
1930	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1931	sub	r1,r1,#64
1932	ldr	r2,[sp,#0]
1933	eor	r12,r12,r12
1934	eor	r3,r5,r6
1935	b	L_00_48
1936
1937.align	4
1938L_00_48:
1939	vext.8	q8,q0,q1,#4
1940	add	r11,r11,r2
1941	eor	r2,r9,r10
1942	eor	r0,r8,r8,ror#5
1943	vext.8	q9,q2,q3,#4
1944	add	r4,r4,r12
1945	and	r2,r2,r8
1946	eor	r12,r0,r8,ror#19
1947	vshr.u32	q10,q8,#7
1948	eor	r0,r4,r4,ror#11
1949	eor	r2,r2,r10
1950	vadd.i32	q0,q0,q9
1951	add	r11,r11,r12,ror#6
1952	eor	r12,r4,r5
1953	vshr.u32	q9,q8,#3
1954	eor	r0,r0,r4,ror#20
1955	add	r11,r11,r2
1956	vsli.32	q10,q8,#25
1957	ldr	r2,[sp,#4]
1958	and	r3,r3,r12
1959	vshr.u32	q11,q8,#18
1960	add	r7,r7,r11
1961	add	r11,r11,r0,ror#2
1962	eor	r3,r3,r5
1963	veor	q9,q9,q10
1964	add	r10,r10,r2
1965	vsli.32	q11,q8,#14
1966	eor	r2,r8,r9
1967	eor	r0,r7,r7,ror#5
1968	vshr.u32	d24,d7,#17
1969	add	r11,r11,r3
1970	and	r2,r2,r7
1971	veor	q9,q9,q11
1972	eor	r3,r0,r7,ror#19
1973	eor	r0,r11,r11,ror#11
1974	vsli.32	d24,d7,#15
1975	eor	r2,r2,r9
1976	add	r10,r10,r3,ror#6
1977	vshr.u32	d25,d7,#10
1978	eor	r3,r11,r4
1979	eor	r0,r0,r11,ror#20
1980	vadd.i32	q0,q0,q9
1981	add	r10,r10,r2
1982	ldr	r2,[sp,#8]
1983	veor	d25,d25,d24
1984	and	r12,r12,r3
1985	add	r6,r6,r10
1986	vshr.u32	d24,d7,#19
1987	add	r10,r10,r0,ror#2
1988	eor	r12,r12,r4
1989	vsli.32	d24,d7,#13
1990	add	r9,r9,r2
1991	eor	r2,r7,r8
1992	veor	d25,d25,d24
1993	eor	r0,r6,r6,ror#5
1994	add	r10,r10,r12
1995	vadd.i32	d0,d0,d25
1996	and	r2,r2,r6
1997	eor	r12,r0,r6,ror#19
1998	vshr.u32	d24,d0,#17
1999	eor	r0,r10,r10,ror#11
2000	eor	r2,r2,r8
2001	vsli.32	d24,d0,#15
2002	add	r9,r9,r12,ror#6
2003	eor	r12,r10,r11
2004	vshr.u32	d25,d0,#10
2005	eor	r0,r0,r10,ror#20
2006	add	r9,r9,r2
2007	veor	d25,d25,d24
2008	ldr	r2,[sp,#12]
2009	and	r3,r3,r12
2010	vshr.u32	d24,d0,#19
2011	add	r5,r5,r9
2012	add	r9,r9,r0,ror#2
2013	eor	r3,r3,r11
2014	vld1.32	{q8},[r14,:128]!
2015	add	r8,r8,r2
2016	vsli.32	d24,d0,#13
2017	eor	r2,r6,r7
2018	eor	r0,r5,r5,ror#5
2019	veor	d25,d25,d24
2020	add	r9,r9,r3
2021	and	r2,r2,r5
2022	vadd.i32	d1,d1,d25
2023	eor	r3,r0,r5,ror#19
2024	eor	r0,r9,r9,ror#11
2025	vadd.i32	q8,q8,q0
2026	eor	r2,r2,r7
2027	add	r8,r8,r3,ror#6
2028	eor	r3,r9,r10
2029	eor	r0,r0,r9,ror#20
2030	add	r8,r8,r2
2031	ldr	r2,[sp,#16]
2032	and	r12,r12,r3
2033	add	r4,r4,r8
2034	vst1.32	{q8},[r1,:128]!
2035	add	r8,r8,r0,ror#2
2036	eor	r12,r12,r10
2037	vext.8	q8,q1,q2,#4
2038	add	r7,r7,r2
2039	eor	r2,r5,r6
2040	eor	r0,r4,r4,ror#5
2041	vext.8	q9,q3,q0,#4
2042	add	r8,r8,r12
2043	and	r2,r2,r4
2044	eor	r12,r0,r4,ror#19
2045	vshr.u32	q10,q8,#7
2046	eor	r0,r8,r8,ror#11
2047	eor	r2,r2,r6
2048	vadd.i32	q1,q1,q9
2049	add	r7,r7,r12,ror#6
2050	eor	r12,r8,r9
2051	vshr.u32	q9,q8,#3
2052	eor	r0,r0,r8,ror#20
2053	add	r7,r7,r2
2054	vsli.32	q10,q8,#25
2055	ldr	r2,[sp,#20]
2056	and	r3,r3,r12
2057	vshr.u32	q11,q8,#18
2058	add	r11,r11,r7
2059	add	r7,r7,r0,ror#2
2060	eor	r3,r3,r9
2061	veor	q9,q9,q10
2062	add	r6,r6,r2
2063	vsli.32	q11,q8,#14
2064	eor	r2,r4,r5
2065	eor	r0,r11,r11,ror#5
2066	vshr.u32	d24,d1,#17
2067	add	r7,r7,r3
2068	and	r2,r2,r11
2069	veor	q9,q9,q11
2070	eor	r3,r0,r11,ror#19
2071	eor	r0,r7,r7,ror#11
2072	vsli.32	d24,d1,#15
2073	eor	r2,r2,r5
2074	add	r6,r6,r3,ror#6
2075	vshr.u32	d25,d1,#10
2076	eor	r3,r7,r8
2077	eor	r0,r0,r7,ror#20
2078	vadd.i32	q1,q1,q9
2079	add	r6,r6,r2
2080	ldr	r2,[sp,#24]
2081	veor	d25,d25,d24
2082	and	r12,r12,r3
2083	add	r10,r10,r6
2084	vshr.u32	d24,d1,#19
2085	add	r6,r6,r0,ror#2
2086	eor	r12,r12,r8
2087	vsli.32	d24,d1,#13
2088	add	r5,r5,r2
2089	eor	r2,r11,r4
2090	veor	d25,d25,d24
2091	eor	r0,r10,r10,ror#5
2092	add	r6,r6,r12
2093	vadd.i32	d2,d2,d25
2094	and	r2,r2,r10
2095	eor	r12,r0,r10,ror#19
2096	vshr.u32	d24,d2,#17
2097	eor	r0,r6,r6,ror#11
2098	eor	r2,r2,r4
2099	vsli.32	d24,d2,#15
2100	add	r5,r5,r12,ror#6
2101	eor	r12,r6,r7
2102	vshr.u32	d25,d2,#10
2103	eor	r0,r0,r6,ror#20
2104	add	r5,r5,r2
2105	veor	d25,d25,d24
2106	ldr	r2,[sp,#28]
2107	and	r3,r3,r12
2108	vshr.u32	d24,d2,#19
2109	add	r9,r9,r5
2110	add	r5,r5,r0,ror#2
2111	eor	r3,r3,r7
2112	vld1.32	{q8},[r14,:128]!
2113	add	r4,r4,r2
2114	vsli.32	d24,d2,#13
2115	eor	r2,r10,r11
2116	eor	r0,r9,r9,ror#5
2117	veor	d25,d25,d24
2118	add	r5,r5,r3
2119	and	r2,r2,r9
2120	vadd.i32	d3,d3,d25
2121	eor	r3,r0,r9,ror#19
2122	eor	r0,r5,r5,ror#11
2123	vadd.i32	q8,q8,q1
2124	eor	r2,r2,r11
2125	add	r4,r4,r3,ror#6
2126	eor	r3,r5,r6
2127	eor	r0,r0,r5,ror#20
2128	add	r4,r4,r2
2129	ldr	r2,[sp,#32]
2130	and	r12,r12,r3
2131	add	r8,r8,r4
2132	vst1.32	{q8},[r1,:128]!
2133	add	r4,r4,r0,ror#2
2134	eor	r12,r12,r6
2135	vext.8	q8,q2,q3,#4
2136	add	r11,r11,r2
2137	eor	r2,r9,r10
2138	eor	r0,r8,r8,ror#5
2139	vext.8	q9,q0,q1,#4
2140	add	r4,r4,r12
2141	and	r2,r2,r8
2142	eor	r12,r0,r8,ror#19
2143	vshr.u32	q10,q8,#7
2144	eor	r0,r4,r4,ror#11
2145	eor	r2,r2,r10
2146	vadd.i32	q2,q2,q9
2147	add	r11,r11,r12,ror#6
2148	eor	r12,r4,r5
2149	vshr.u32	q9,q8,#3
2150	eor	r0,r0,r4,ror#20
2151	add	r11,r11,r2
2152	vsli.32	q10,q8,#25
2153	ldr	r2,[sp,#36]
2154	and	r3,r3,r12
2155	vshr.u32	q11,q8,#18
2156	add	r7,r7,r11
2157	add	r11,r11,r0,ror#2
2158	eor	r3,r3,r5
2159	veor	q9,q9,q10
2160	add	r10,r10,r2
2161	vsli.32	q11,q8,#14
2162	eor	r2,r8,r9
2163	eor	r0,r7,r7,ror#5
2164	vshr.u32	d24,d3,#17
2165	add	r11,r11,r3
2166	and	r2,r2,r7
2167	veor	q9,q9,q11
2168	eor	r3,r0,r7,ror#19
2169	eor	r0,r11,r11,ror#11
2170	vsli.32	d24,d3,#15
2171	eor	r2,r2,r9
2172	add	r10,r10,r3,ror#6
2173	vshr.u32	d25,d3,#10
2174	eor	r3,r11,r4
2175	eor	r0,r0,r11,ror#20
2176	vadd.i32	q2,q2,q9
2177	add	r10,r10,r2
2178	ldr	r2,[sp,#40]
2179	veor	d25,d25,d24
2180	and	r12,r12,r3
2181	add	r6,r6,r10
2182	vshr.u32	d24,d3,#19
2183	add	r10,r10,r0,ror#2
2184	eor	r12,r12,r4
2185	vsli.32	d24,d3,#13
2186	add	r9,r9,r2
2187	eor	r2,r7,r8
2188	veor	d25,d25,d24
2189	eor	r0,r6,r6,ror#5
2190	add	r10,r10,r12
2191	vadd.i32	d4,d4,d25
2192	and	r2,r2,r6
2193	eor	r12,r0,r6,ror#19
2194	vshr.u32	d24,d4,#17
2195	eor	r0,r10,r10,ror#11
2196	eor	r2,r2,r8
2197	vsli.32	d24,d4,#15
2198	add	r9,r9,r12,ror#6
2199	eor	r12,r10,r11
2200	vshr.u32	d25,d4,#10
2201	eor	r0,r0,r10,ror#20
2202	add	r9,r9,r2
2203	veor	d25,d25,d24
2204	ldr	r2,[sp,#44]
2205	and	r3,r3,r12
2206	vshr.u32	d24,d4,#19
2207	add	r5,r5,r9
2208	add	r9,r9,r0,ror#2
2209	eor	r3,r3,r11
2210	vld1.32	{q8},[r14,:128]!
2211	add	r8,r8,r2
2212	vsli.32	d24,d4,#13
2213	eor	r2,r6,r7
2214	eor	r0,r5,r5,ror#5
2215	veor	d25,d25,d24
2216	add	r9,r9,r3
2217	and	r2,r2,r5
2218	vadd.i32	d5,d5,d25
2219	eor	r3,r0,r5,ror#19
2220	eor	r0,r9,r9,ror#11
2221	vadd.i32	q8,q8,q2
2222	eor	r2,r2,r7
2223	add	r8,r8,r3,ror#6
2224	eor	r3,r9,r10
2225	eor	r0,r0,r9,ror#20
2226	add	r8,r8,r2
2227	ldr	r2,[sp,#48]
2228	and	r12,r12,r3
2229	add	r4,r4,r8
2230	vst1.32	{q8},[r1,:128]!
2231	add	r8,r8,r0,ror#2
2232	eor	r12,r12,r10
2233	vext.8	q8,q3,q0,#4
2234	add	r7,r7,r2
2235	eor	r2,r5,r6
2236	eor	r0,r4,r4,ror#5
2237	vext.8	q9,q1,q2,#4
2238	add	r8,r8,r12
2239	and	r2,r2,r4
2240	eor	r12,r0,r4,ror#19
2241	vshr.u32	q10,q8,#7
2242	eor	r0,r8,r8,ror#11
2243	eor	r2,r2,r6
2244	vadd.i32	q3,q3,q9
2245	add	r7,r7,r12,ror#6
2246	eor	r12,r8,r9
2247	vshr.u32	q9,q8,#3
2248	eor	r0,r0,r8,ror#20
2249	add	r7,r7,r2
2250	vsli.32	q10,q8,#25
2251	ldr	r2,[sp,#52]
2252	and	r3,r3,r12
2253	vshr.u32	q11,q8,#18
2254	add	r11,r11,r7
2255	add	r7,r7,r0,ror#2
2256	eor	r3,r3,r9
2257	veor	q9,q9,q10
2258	add	r6,r6,r2
2259	vsli.32	q11,q8,#14
2260	eor	r2,r4,r5
2261	eor	r0,r11,r11,ror#5
2262	vshr.u32	d24,d5,#17
2263	add	r7,r7,r3
2264	and	r2,r2,r11
2265	veor	q9,q9,q11
2266	eor	r3,r0,r11,ror#19
2267	eor	r0,r7,r7,ror#11
2268	vsli.32	d24,d5,#15
2269	eor	r2,r2,r5
2270	add	r6,r6,r3,ror#6
2271	vshr.u32	d25,d5,#10
2272	eor	r3,r7,r8
2273	eor	r0,r0,r7,ror#20
2274	vadd.i32	q3,q3,q9
2275	add	r6,r6,r2
2276	ldr	r2,[sp,#56]
2277	veor	d25,d25,d24
2278	and	r12,r12,r3
2279	add	r10,r10,r6
2280	vshr.u32	d24,d5,#19
2281	add	r6,r6,r0,ror#2
2282	eor	r12,r12,r8
2283	vsli.32	d24,d5,#13
2284	add	r5,r5,r2
2285	eor	r2,r11,r4
2286	veor	d25,d25,d24
2287	eor	r0,r10,r10,ror#5
2288	add	r6,r6,r12
2289	vadd.i32	d6,d6,d25
2290	and	r2,r2,r10
2291	eor	r12,r0,r10,ror#19
2292	vshr.u32	d24,d6,#17
2293	eor	r0,r6,r6,ror#11
2294	eor	r2,r2,r4
2295	vsli.32	d24,d6,#15
2296	add	r5,r5,r12,ror#6
2297	eor	r12,r6,r7
2298	vshr.u32	d25,d6,#10
2299	eor	r0,r0,r6,ror#20
2300	add	r5,r5,r2
2301	veor	d25,d25,d24
2302	ldr	r2,[sp,#60]
2303	and	r3,r3,r12
2304	vshr.u32	d24,d6,#19
2305	add	r9,r9,r5
2306	add	r5,r5,r0,ror#2
2307	eor	r3,r3,r7
2308	vld1.32	{q8},[r14,:128]!
2309	add	r4,r4,r2
2310	vsli.32	d24,d6,#13
2311	eor	r2,r10,r11
2312	eor	r0,r9,r9,ror#5
2313	veor	d25,d25,d24
2314	add	r5,r5,r3
2315	and	r2,r2,r9
2316	vadd.i32	d7,d7,d25
2317	eor	r3,r0,r9,ror#19
2318	eor	r0,r5,r5,ror#11
2319	vadd.i32	q8,q8,q3
2320	eor	r2,r2,r11
2321	add	r4,r4,r3,ror#6
2322	eor	r3,r5,r6
2323	eor	r0,r0,r5,ror#20
2324	add	r4,r4,r2
2325	ldr	r2,[r14]
2326	and	r12,r12,r3
2327	add	r8,r8,r4
2328	vst1.32	{q8},[r1,:128]!
2329	add	r4,r4,r0,ror#2
2330	eor	r12,r12,r6
2331	teq	r2,#0				@ check for K256 terminator
2332	ldr	r2,[sp,#0]
2333	sub	r1,r1,#64
2334	bne	L_00_48
2335
2336	ldr	r1,[sp,#68]
2337	ldr	r0,[sp,#72]
2338	sub	r14,r14,#256	@ rewind r14
2339	teq	r1,r0
2340	it	eq
2341	subeq	r1,r1,#64		@ avoid SEGV
2342	vld1.8	{q0},[r1]!		@ load next input block
2343	vld1.8	{q1},[r1]!
2344	vld1.8	{q2},[r1]!
2345	vld1.8	{q3},[r1]!
2346	it	ne
2347	strne	r1,[sp,#68]
2348	mov	r1,sp
2349	add	r11,r11,r2
2350	eor	r2,r9,r10
2351	eor	r0,r8,r8,ror#5
2352	add	r4,r4,r12
2353	vld1.32	{q8},[r14,:128]!
2354	and	r2,r2,r8
2355	eor	r12,r0,r8,ror#19
2356	eor	r0,r4,r4,ror#11
2357	eor	r2,r2,r10
2358	vrev32.8	q0,q0
2359	add	r11,r11,r12,ror#6
2360	eor	r12,r4,r5
2361	eor	r0,r0,r4,ror#20
2362	add	r11,r11,r2
2363	vadd.i32	q8,q8,q0
2364	ldr	r2,[sp,#4]
2365	and	r3,r3,r12
2366	add	r7,r7,r11
2367	add	r11,r11,r0,ror#2
2368	eor	r3,r3,r5
2369	add	r10,r10,r2
2370	eor	r2,r8,r9
2371	eor	r0,r7,r7,ror#5
2372	add	r11,r11,r3
2373	and	r2,r2,r7
2374	eor	r3,r0,r7,ror#19
2375	eor	r0,r11,r11,ror#11
2376	eor	r2,r2,r9
2377	add	r10,r10,r3,ror#6
2378	eor	r3,r11,r4
2379	eor	r0,r0,r11,ror#20
2380	add	r10,r10,r2
2381	ldr	r2,[sp,#8]
2382	and	r12,r12,r3
2383	add	r6,r6,r10
2384	add	r10,r10,r0,ror#2
2385	eor	r12,r12,r4
2386	add	r9,r9,r2
2387	eor	r2,r7,r8
2388	eor	r0,r6,r6,ror#5
2389	add	r10,r10,r12
2390	and	r2,r2,r6
2391	eor	r12,r0,r6,ror#19
2392	eor	r0,r10,r10,ror#11
2393	eor	r2,r2,r8
2394	add	r9,r9,r12,ror#6
2395	eor	r12,r10,r11
2396	eor	r0,r0,r10,ror#20
2397	add	r9,r9,r2
2398	ldr	r2,[sp,#12]
2399	and	r3,r3,r12
2400	add	r5,r5,r9
2401	add	r9,r9,r0,ror#2
2402	eor	r3,r3,r11
2403	add	r8,r8,r2
2404	eor	r2,r6,r7
2405	eor	r0,r5,r5,ror#5
2406	add	r9,r9,r3
2407	and	r2,r2,r5
2408	eor	r3,r0,r5,ror#19
2409	eor	r0,r9,r9,ror#11
2410	eor	r2,r2,r7
2411	add	r8,r8,r3,ror#6
2412	eor	r3,r9,r10
2413	eor	r0,r0,r9,ror#20
2414	add	r8,r8,r2
2415	ldr	r2,[sp,#16]
2416	and	r12,r12,r3
2417	add	r4,r4,r8
2418	add	r8,r8,r0,ror#2
2419	eor	r12,r12,r10
2420	vst1.32	{q8},[r1,:128]!
2421	add	r7,r7,r2
2422	eor	r2,r5,r6
2423	eor	r0,r4,r4,ror#5
2424	add	r8,r8,r12
2425	vld1.32	{q8},[r14,:128]!
2426	and	r2,r2,r4
2427	eor	r12,r0,r4,ror#19
2428	eor	r0,r8,r8,ror#11
2429	eor	r2,r2,r6
2430	vrev32.8	q1,q1
2431	add	r7,r7,r12,ror#6
2432	eor	r12,r8,r9
2433	eor	r0,r0,r8,ror#20
2434	add	r7,r7,r2
2435	vadd.i32	q8,q8,q1
2436	ldr	r2,[sp,#20]
2437	and	r3,r3,r12
2438	add	r11,r11,r7
2439	add	r7,r7,r0,ror#2
2440	eor	r3,r3,r9
2441	add	r6,r6,r2
2442	eor	r2,r4,r5
2443	eor	r0,r11,r11,ror#5
2444	add	r7,r7,r3
2445	and	r2,r2,r11
2446	eor	r3,r0,r11,ror#19
2447	eor	r0,r7,r7,ror#11
2448	eor	r2,r2,r5
2449	add	r6,r6,r3,ror#6
2450	eor	r3,r7,r8
2451	eor	r0,r0,r7,ror#20
2452	add	r6,r6,r2
2453	ldr	r2,[sp,#24]
2454	and	r12,r12,r3
2455	add	r10,r10,r6
2456	add	r6,r6,r0,ror#2
2457	eor	r12,r12,r8
2458	add	r5,r5,r2
2459	eor	r2,r11,r4
2460	eor	r0,r10,r10,ror#5
2461	add	r6,r6,r12
2462	and	r2,r2,r10
2463	eor	r12,r0,r10,ror#19
2464	eor	r0,r6,r6,ror#11
2465	eor	r2,r2,r4
2466	add	r5,r5,r12,ror#6
2467	eor	r12,r6,r7
2468	eor	r0,r0,r6,ror#20
2469	add	r5,r5,r2
2470	ldr	r2,[sp,#28]
2471	and	r3,r3,r12
2472	add	r9,r9,r5
2473	add	r5,r5,r0,ror#2
2474	eor	r3,r3,r7
2475	add	r4,r4,r2
2476	eor	r2,r10,r11
2477	eor	r0,r9,r9,ror#5
2478	add	r5,r5,r3
2479	and	r2,r2,r9
2480	eor	r3,r0,r9,ror#19
2481	eor	r0,r5,r5,ror#11
2482	eor	r2,r2,r11
2483	add	r4,r4,r3,ror#6
2484	eor	r3,r5,r6
2485	eor	r0,r0,r5,ror#20
2486	add	r4,r4,r2
2487	ldr	r2,[sp,#32]
2488	and	r12,r12,r3
2489	add	r8,r8,r4
2490	add	r4,r4,r0,ror#2
2491	eor	r12,r12,r6
2492	vst1.32	{q8},[r1,:128]!
2493	add	r11,r11,r2
2494	eor	r2,r9,r10
2495	eor	r0,r8,r8,ror#5
2496	add	r4,r4,r12
2497	vld1.32	{q8},[r14,:128]!
2498	and	r2,r2,r8
2499	eor	r12,r0,r8,ror#19
2500	eor	r0,r4,r4,ror#11
2501	eor	r2,r2,r10
2502	vrev32.8	q2,q2
2503	add	r11,r11,r12,ror#6
2504	eor	r12,r4,r5
2505	eor	r0,r0,r4,ror#20
2506	add	r11,r11,r2
2507	vadd.i32	q8,q8,q2
2508	ldr	r2,[sp,#36]
2509	and	r3,r3,r12
2510	add	r7,r7,r11
2511	add	r11,r11,r0,ror#2
2512	eor	r3,r3,r5
2513	add	r10,r10,r2
2514	eor	r2,r8,r9
2515	eor	r0,r7,r7,ror#5
2516	add	r11,r11,r3
2517	and	r2,r2,r7
2518	eor	r3,r0,r7,ror#19
2519	eor	r0,r11,r11,ror#11
2520	eor	r2,r2,r9
2521	add	r10,r10,r3,ror#6
2522	eor	r3,r11,r4
2523	eor	r0,r0,r11,ror#20
2524	add	r10,r10,r2
2525	ldr	r2,[sp,#40]
2526	and	r12,r12,r3
2527	add	r6,r6,r10
2528	add	r10,r10,r0,ror#2
2529	eor	r12,r12,r4
2530	add	r9,r9,r2
2531	eor	r2,r7,r8
2532	eor	r0,r6,r6,ror#5
2533	add	r10,r10,r12
2534	and	r2,r2,r6
2535	eor	r12,r0,r6,ror#19
2536	eor	r0,r10,r10,ror#11
2537	eor	r2,r2,r8
2538	add	r9,r9,r12,ror#6
2539	eor	r12,r10,r11
2540	eor	r0,r0,r10,ror#20
2541	add	r9,r9,r2
2542	ldr	r2,[sp,#44]
2543	and	r3,r3,r12
2544	add	r5,r5,r9
2545	add	r9,r9,r0,ror#2
2546	eor	r3,r3,r11
2547	add	r8,r8,r2
2548	eor	r2,r6,r7
2549	eor	r0,r5,r5,ror#5
2550	add	r9,r9,r3
2551	and	r2,r2,r5
2552	eor	r3,r0,r5,ror#19
2553	eor	r0,r9,r9,ror#11
2554	eor	r2,r2,r7
2555	add	r8,r8,r3,ror#6
2556	eor	r3,r9,r10
2557	eor	r0,r0,r9,ror#20
2558	add	r8,r8,r2
2559	ldr	r2,[sp,#48]
2560	and	r12,r12,r3
2561	add	r4,r4,r8
2562	add	r8,r8,r0,ror#2
2563	eor	r12,r12,r10
2564	vst1.32	{q8},[r1,:128]!
2565	add	r7,r7,r2
2566	eor	r2,r5,r6
2567	eor	r0,r4,r4,ror#5
2568	add	r8,r8,r12
2569	vld1.32	{q8},[r14,:128]!
2570	and	r2,r2,r4
2571	eor	r12,r0,r4,ror#19
2572	eor	r0,r8,r8,ror#11
2573	eor	r2,r2,r6
2574	vrev32.8	q3,q3
2575	add	r7,r7,r12,ror#6
2576	eor	r12,r8,r9
2577	eor	r0,r0,r8,ror#20
2578	add	r7,r7,r2
2579	vadd.i32	q8,q8,q3
2580	ldr	r2,[sp,#52]
2581	and	r3,r3,r12
2582	add	r11,r11,r7
2583	add	r7,r7,r0,ror#2
2584	eor	r3,r3,r9
2585	add	r6,r6,r2
2586	eor	r2,r4,r5
2587	eor	r0,r11,r11,ror#5
2588	add	r7,r7,r3
2589	and	r2,r2,r11
2590	eor	r3,r0,r11,ror#19
2591	eor	r0,r7,r7,ror#11
2592	eor	r2,r2,r5
2593	add	r6,r6,r3,ror#6
2594	eor	r3,r7,r8
2595	eor	r0,r0,r7,ror#20
2596	add	r6,r6,r2
2597	ldr	r2,[sp,#56]
2598	and	r12,r12,r3
2599	add	r10,r10,r6
2600	add	r6,r6,r0,ror#2
2601	eor	r12,r12,r8
2602	add	r5,r5,r2
2603	eor	r2,r11,r4
2604	eor	r0,r10,r10,ror#5
2605	add	r6,r6,r12
2606	and	r2,r2,r10
2607	eor	r12,r0,r10,ror#19
2608	eor	r0,r6,r6,ror#11
2609	eor	r2,r2,r4
2610	add	r5,r5,r12,ror#6
2611	eor	r12,r6,r7
2612	eor	r0,r0,r6,ror#20
2613	add	r5,r5,r2
2614	ldr	r2,[sp,#60]
2615	and	r3,r3,r12
2616	add	r9,r9,r5
2617	add	r5,r5,r0,ror#2
2618	eor	r3,r3,r7
2619	add	r4,r4,r2
2620	eor	r2,r10,r11
2621	eor	r0,r9,r9,ror#5
2622	add	r5,r5,r3
2623	and	r2,r2,r9
2624	eor	r3,r0,r9,ror#19
2625	eor	r0,r5,r5,ror#11
2626	eor	r2,r2,r11
2627	add	r4,r4,r3,ror#6
2628	eor	r3,r5,r6
2629	eor	r0,r0,r5,ror#20
2630	add	r4,r4,r2
2631	ldr	r2,[sp,#64]
2632	and	r12,r12,r3
2633	add	r8,r8,r4
2634	add	r4,r4,r0,ror#2
2635	eor	r12,r12,r6
2636	vst1.32	{q8},[r1,:128]!
2637	ldr	r0,[r2,#0]
2638	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2639	ldr	r12,[r2,#4]
2640	ldr	r3,[r2,#8]
2641	ldr	r1,[r2,#12]
2642	add	r4,r4,r0			@ accumulate
2643	ldr	r0,[r2,#16]
2644	add	r5,r5,r12
2645	ldr	r12,[r2,#20]
2646	add	r6,r6,r3
2647	ldr	r3,[r2,#24]
2648	add	r7,r7,r1
2649	ldr	r1,[r2,#28]
2650	add	r8,r8,r0
2651	str	r4,[r2],#4
2652	add	r9,r9,r12
2653	str	r5,[r2],#4
2654	add	r10,r10,r3
2655	str	r6,[r2],#4
2656	add	r11,r11,r1
2657	str	r7,[r2],#4
2658	stmia	r2,{r8,r9,r10,r11}
2659
2660	ittte	ne
2661	movne	r1,sp
2662	ldrne	r2,[sp,#0]
2663	eorne	r12,r12,r12
2664	ldreq	sp,[sp,#76]			@ restore original sp
2665	itt	ne
2666	eorne	r3,r5,r6
2667	bne	L_00_48
2668
2669	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2670
2671#endif
2672#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2673
2674# if defined(__thumb2__)
2675#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2676# else
2677#  define INST(a,b,c,d)	.byte	a,b,c,d
2678# endif
2679
2680#ifdef __thumb2__
2681.thumb_func	sha256_block_data_order_armv8
2682#endif
2683.align	5
2684sha256_block_data_order_armv8:
2685LARMv8:
2686	vld1.32	{q0,q1},[r0]
2687	sub	r3,r3,#256+32
2688	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2689	b	Loop_v8
2690
2691.align	4
2692Loop_v8:
2693	vld1.8	{q8,q9},[r1]!
2694	vld1.8	{q10,q11},[r1]!
2695	vld1.32	{q12},[r3]!
2696	vrev32.8	q8,q8
2697	vrev32.8	q9,q9
2698	vrev32.8	q10,q10
2699	vrev32.8	q11,q11
2700	vmov	q14,q0	@ offload
2701	vmov	q15,q1
2702	teq	r1,r2
2703	vld1.32	{q13},[r3]!
2704	vadd.i32	q12,q12,q8
2705	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2706	vmov	q2,q0
2707	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2708	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2709	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2710	vld1.32	{q12},[r3]!
2711	vadd.i32	q13,q13,q9
2712	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2713	vmov	q2,q0
2714	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2715	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2716	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2717	vld1.32	{q13},[r3]!
2718	vadd.i32	q12,q12,q10
2719	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2720	vmov	q2,q0
2721	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2722	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2723	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2724	vld1.32	{q12},[r3]!
2725	vadd.i32	q13,q13,q11
2726	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2727	vmov	q2,q0
2728	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2729	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2730	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2731	vld1.32	{q13},[r3]!
2732	vadd.i32	q12,q12,q8
2733	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2734	vmov	q2,q0
2735	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2736	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2737	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2738	vld1.32	{q12},[r3]!
2739	vadd.i32	q13,q13,q9
2740	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2741	vmov	q2,q0
2742	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2743	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2744	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2745	vld1.32	{q13},[r3]!
2746	vadd.i32	q12,q12,q10
2747	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2748	vmov	q2,q0
2749	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2750	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2751	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2752	vld1.32	{q12},[r3]!
2753	vadd.i32	q13,q13,q11
2754	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2755	vmov	q2,q0
2756	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2757	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2758	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2759	vld1.32	{q13},[r3]!
2760	vadd.i32	q12,q12,q8
2761	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2762	vmov	q2,q0
2763	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2764	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2765	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2766	vld1.32	{q12},[r3]!
2767	vadd.i32	q13,q13,q9
2768	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2769	vmov	q2,q0
2770	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2771	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2772	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2773	vld1.32	{q13},[r3]!
2774	vadd.i32	q12,q12,q10
2775	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2776	vmov	q2,q0
2777	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2778	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2779	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2780	vld1.32	{q12},[r3]!
2781	vadd.i32	q13,q13,q11
2782	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2783	vmov	q2,q0
2784	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2785	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2786	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2787	vld1.32	{q13},[r3]!
2788	vadd.i32	q12,q12,q8
2789	vmov	q2,q0
2790	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2791	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2792
2793	vld1.32	{q12},[r3]!
2794	vadd.i32	q13,q13,q9
2795	vmov	q2,q0
2796	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2797	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2798
2799	vld1.32	{q13},[r3]
2800	vadd.i32	q12,q12,q10
2801	sub	r3,r3,#256-16	@ rewind
2802	vmov	q2,q0
2803	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2804	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2805
2806	vadd.i32	q13,q13,q11
2807	vmov	q2,q0
2808	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2809	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2810
2811	vadd.i32	q0,q0,q14
2812	vadd.i32	q1,q1,q15
2813	it	ne
2814	bne	Loop_v8
2815
2816	vst1.32	{q0,q1},[r0]
2817
2818	bx	lr		@ bx lr
2819
2820#endif
2821.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2822.align	2
2823.align	2
2824#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2825.comm	_OPENSSL_armcap_P,4
2826.non_lazy_symbol_pointer
2827OPENSSL_armcap_P:
2828.indirect_symbol	_OPENSSL_armcap_P
2829.long	0
2830.private_extern	_OPENSSL_armcap_P
2831#endif
2832