1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
15@
16@ Licensed under the OpenSSL license (the "License").  You may not use
17@ this file except in compliance with the License.  You can obtain a copy
18@ in the file LICENSE in the source distribution or at
19@ https://www.openssl.org/source/license.html
20
21
22@ ====================================================================
23@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
24@ project. The module is, however, dual licensed under OpenSSL and
25@ CRYPTOGAMS licenses depending on where you obtain it. For further
26@ details see http://www.openssl.org/~appro/cryptogams/.
27@
28@ Permission to use under GPL terms is granted.
29@ ====================================================================
30
31@ SHA256 block procedure for ARMv4. May 2007.
32
33@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
34@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
35@ byte [on single-issue Xscale PXA250 core].
36
37@ July 2010.
38@
39@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
40@ Cortex A8 core and ~20 cycles per processed byte.
41
42@ February 2011.
43@
44@ Profiler-assisted and platform-specific optimization resulted in 16%
45@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
46
47@ September 2013.
48@
49@ Add NEON implementation. On Cortex A8 it was measured to process one
50@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
51@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
52@ code (meaning that latter performs sub-optimally, nothing was done
53@ about it).
54
55@ May 2014.
56@
57@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
58
59#ifndef __KERNEL__
60# include <openssl/arm_arch.h>
61#else
62# define __ARM_ARCH__ __LINUX_ARM_ARCH__
63# define __ARM_MAX_ARCH__ 7
64#endif
65
66@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
67@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
68@ instructions are manually-encoded. (See unsha256.)
69
70
71.text
72#if defined(__thumb2__)
73.syntax	unified
74.thumb
75#else
76.code	32
77#endif
78
79
80.align	5
81K256:
82.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
83.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
84.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
85.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
86.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
87.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
88.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
89.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
90.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
91.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
92.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
93.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
94.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
95.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
96.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
97.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
98
99.word	0				@ terminator
100#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
101LOPENSSL_armcap:
102.word	OPENSSL_armcap_P-Lsha256_block_data_order
103#endif
104.align	5
105
106.globl	_sha256_block_data_order
107.private_extern	_sha256_block_data_order
108#ifdef __thumb2__
109.thumb_func	_sha256_block_data_order
110#endif
111_sha256_block_data_order:
112Lsha256_block_data_order:
113#if __ARM_ARCH__<7 && !defined(__thumb2__)
114	sub	r3,pc,#8		@ _sha256_block_data_order
115#else
116	adr	r3,Lsha256_block_data_order
117#endif
118#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
119	ldr	r12,LOPENSSL_armcap
120	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
121#ifdef	__APPLE__
122	ldr	r12,[r12]
123#endif
124	tst	r12,#ARMV8_SHA256
125	bne	LARMv8
126	tst	r12,#ARMV7_NEON
127	bne	LNEON
128#endif
129	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
130	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
131	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
132	sub	r14,r3,#256+32	@ K256
133	sub	sp,sp,#16*4		@ alloca(X[16])
134Loop:
135# if __ARM_ARCH__>=7
136	ldr	r2,[r1],#4
137# else
138	ldrb	r2,[r1,#3]
139# endif
140	eor	r3,r5,r6		@ magic
141	eor	r12,r12,r12
142#if __ARM_ARCH__>=7
143	@ ldr	r2,[r1],#4			@ 0
144# if 0==15
145	str	r1,[sp,#17*4]			@ make room for r1
146# endif
147	eor	r0,r8,r8,ror#5
148	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
149	eor	r0,r0,r8,ror#19	@ Sigma1(e)
150# ifndef __ARMEB__
151	rev	r2,r2
152# endif
153#else
154	@ ldrb	r2,[r1,#3]			@ 0
155	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
156	ldrb	r12,[r1,#2]
157	ldrb	r0,[r1,#1]
158	orr	r2,r2,r12,lsl#8
159	ldrb	r12,[r1],#4
160	orr	r2,r2,r0,lsl#16
161# if 0==15
162	str	r1,[sp,#17*4]			@ make room for r1
163# endif
164	eor	r0,r8,r8,ror#5
165	orr	r2,r2,r12,lsl#24
166	eor	r0,r0,r8,ror#19	@ Sigma1(e)
167#endif
168	ldr	r12,[r14],#4			@ *K256++
169	add	r11,r11,r2			@ h+=X[i]
170	str	r2,[sp,#0*4]
171	eor	r2,r9,r10
172	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
173	and	r2,r2,r8
174	add	r11,r11,r12			@ h+=K256[i]
175	eor	r2,r2,r10			@ Ch(e,f,g)
176	eor	r0,r4,r4,ror#11
177	add	r11,r11,r2			@ h+=Ch(e,f,g)
178#if 0==31
179	and	r12,r12,#0xff
180	cmp	r12,#0xf2			@ done?
181#endif
182#if 0<15
183# if __ARM_ARCH__>=7
184	ldr	r2,[r1],#4			@ prefetch
185# else
186	ldrb	r2,[r1,#3]
187# endif
188	eor	r12,r4,r5			@ a^b, b^c in next round
189#else
190	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
191	eor	r12,r4,r5			@ a^b, b^c in next round
192	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
193#endif
194	eor	r0,r0,r4,ror#20	@ Sigma0(a)
195	and	r3,r3,r12			@ (b^c)&=(a^b)
196	add	r7,r7,r11			@ d+=h
197	eor	r3,r3,r5			@ Maj(a,b,c)
198	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
199	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
200#if __ARM_ARCH__>=7
201	@ ldr	r2,[r1],#4			@ 1
202# if 1==15
203	str	r1,[sp,#17*4]			@ make room for r1
204# endif
205	eor	r0,r7,r7,ror#5
206	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
207	eor	r0,r0,r7,ror#19	@ Sigma1(e)
208# ifndef __ARMEB__
209	rev	r2,r2
210# endif
211#else
212	@ ldrb	r2,[r1,#3]			@ 1
213	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
214	ldrb	r3,[r1,#2]
215	ldrb	r0,[r1,#1]
216	orr	r2,r2,r3,lsl#8
217	ldrb	r3,[r1],#4
218	orr	r2,r2,r0,lsl#16
219# if 1==15
220	str	r1,[sp,#17*4]			@ make room for r1
221# endif
222	eor	r0,r7,r7,ror#5
223	orr	r2,r2,r3,lsl#24
224	eor	r0,r0,r7,ror#19	@ Sigma1(e)
225#endif
226	ldr	r3,[r14],#4			@ *K256++
227	add	r10,r10,r2			@ h+=X[i]
228	str	r2,[sp,#1*4]
229	eor	r2,r8,r9
230	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
231	and	r2,r2,r7
232	add	r10,r10,r3			@ h+=K256[i]
233	eor	r2,r2,r9			@ Ch(e,f,g)
234	eor	r0,r11,r11,ror#11
235	add	r10,r10,r2			@ h+=Ch(e,f,g)
236#if 1==31
237	and	r3,r3,#0xff
238	cmp	r3,#0xf2			@ done?
239#endif
240#if 1<15
241# if __ARM_ARCH__>=7
242	ldr	r2,[r1],#4			@ prefetch
243# else
244	ldrb	r2,[r1,#3]
245# endif
246	eor	r3,r11,r4			@ a^b, b^c in next round
247#else
248	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
249	eor	r3,r11,r4			@ a^b, b^c in next round
250	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
251#endif
252	eor	r0,r0,r11,ror#20	@ Sigma0(a)
253	and	r12,r12,r3			@ (b^c)&=(a^b)
254	add	r6,r6,r10			@ d+=h
255	eor	r12,r12,r4			@ Maj(a,b,c)
256	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
257	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
258#if __ARM_ARCH__>=7
259	@ ldr	r2,[r1],#4			@ 2
260# if 2==15
261	str	r1,[sp,#17*4]			@ make room for r1
262# endif
263	eor	r0,r6,r6,ror#5
264	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
265	eor	r0,r0,r6,ror#19	@ Sigma1(e)
266# ifndef __ARMEB__
267	rev	r2,r2
268# endif
269#else
270	@ ldrb	r2,[r1,#3]			@ 2
271	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
272	ldrb	r12,[r1,#2]
273	ldrb	r0,[r1,#1]
274	orr	r2,r2,r12,lsl#8
275	ldrb	r12,[r1],#4
276	orr	r2,r2,r0,lsl#16
277# if 2==15
278	str	r1,[sp,#17*4]			@ make room for r1
279# endif
280	eor	r0,r6,r6,ror#5
281	orr	r2,r2,r12,lsl#24
282	eor	r0,r0,r6,ror#19	@ Sigma1(e)
283#endif
284	ldr	r12,[r14],#4			@ *K256++
285	add	r9,r9,r2			@ h+=X[i]
286	str	r2,[sp,#2*4]
287	eor	r2,r7,r8
288	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
289	and	r2,r2,r6
290	add	r9,r9,r12			@ h+=K256[i]
291	eor	r2,r2,r8			@ Ch(e,f,g)
292	eor	r0,r10,r10,ror#11
293	add	r9,r9,r2			@ h+=Ch(e,f,g)
294#if 2==31
295	and	r12,r12,#0xff
296	cmp	r12,#0xf2			@ done?
297#endif
298#if 2<15
299# if __ARM_ARCH__>=7
300	ldr	r2,[r1],#4			@ prefetch
301# else
302	ldrb	r2,[r1,#3]
303# endif
304	eor	r12,r10,r11			@ a^b, b^c in next round
305#else
306	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
307	eor	r12,r10,r11			@ a^b, b^c in next round
308	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
309#endif
310	eor	r0,r0,r10,ror#20	@ Sigma0(a)
311	and	r3,r3,r12			@ (b^c)&=(a^b)
312	add	r5,r5,r9			@ d+=h
313	eor	r3,r3,r11			@ Maj(a,b,c)
314	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
315	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
316#if __ARM_ARCH__>=7
317	@ ldr	r2,[r1],#4			@ 3
318# if 3==15
319	str	r1,[sp,#17*4]			@ make room for r1
320# endif
321	eor	r0,r5,r5,ror#5
322	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
323	eor	r0,r0,r5,ror#19	@ Sigma1(e)
324# ifndef __ARMEB__
325	rev	r2,r2
326# endif
327#else
328	@ ldrb	r2,[r1,#3]			@ 3
329	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
330	ldrb	r3,[r1,#2]
331	ldrb	r0,[r1,#1]
332	orr	r2,r2,r3,lsl#8
333	ldrb	r3,[r1],#4
334	orr	r2,r2,r0,lsl#16
335# if 3==15
336	str	r1,[sp,#17*4]			@ make room for r1
337# endif
338	eor	r0,r5,r5,ror#5
339	orr	r2,r2,r3,lsl#24
340	eor	r0,r0,r5,ror#19	@ Sigma1(e)
341#endif
342	ldr	r3,[r14],#4			@ *K256++
343	add	r8,r8,r2			@ h+=X[i]
344	str	r2,[sp,#3*4]
345	eor	r2,r6,r7
346	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
347	and	r2,r2,r5
348	add	r8,r8,r3			@ h+=K256[i]
349	eor	r2,r2,r7			@ Ch(e,f,g)
350	eor	r0,r9,r9,ror#11
351	add	r8,r8,r2			@ h+=Ch(e,f,g)
352#if 3==31
353	and	r3,r3,#0xff
354	cmp	r3,#0xf2			@ done?
355#endif
356#if 3<15
357# if __ARM_ARCH__>=7
358	ldr	r2,[r1],#4			@ prefetch
359# else
360	ldrb	r2,[r1,#3]
361# endif
362	eor	r3,r9,r10			@ a^b, b^c in next round
363#else
364	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
365	eor	r3,r9,r10			@ a^b, b^c in next round
366	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
367#endif
368	eor	r0,r0,r9,ror#20	@ Sigma0(a)
369	and	r12,r12,r3			@ (b^c)&=(a^b)
370	add	r4,r4,r8			@ d+=h
371	eor	r12,r12,r10			@ Maj(a,b,c)
372	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
373	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
374#if __ARM_ARCH__>=7
375	@ ldr	r2,[r1],#4			@ 4
376# if 4==15
377	str	r1,[sp,#17*4]			@ make room for r1
378# endif
379	eor	r0,r4,r4,ror#5
380	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
381	eor	r0,r0,r4,ror#19	@ Sigma1(e)
382# ifndef __ARMEB__
383	rev	r2,r2
384# endif
385#else
386	@ ldrb	r2,[r1,#3]			@ 4
387	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
388	ldrb	r12,[r1,#2]
389	ldrb	r0,[r1,#1]
390	orr	r2,r2,r12,lsl#8
391	ldrb	r12,[r1],#4
392	orr	r2,r2,r0,lsl#16
393# if 4==15
394	str	r1,[sp,#17*4]			@ make room for r1
395# endif
396	eor	r0,r4,r4,ror#5
397	orr	r2,r2,r12,lsl#24
398	eor	r0,r0,r4,ror#19	@ Sigma1(e)
399#endif
400	ldr	r12,[r14],#4			@ *K256++
401	add	r7,r7,r2			@ h+=X[i]
402	str	r2,[sp,#4*4]
403	eor	r2,r5,r6
404	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
405	and	r2,r2,r4
406	add	r7,r7,r12			@ h+=K256[i]
407	eor	r2,r2,r6			@ Ch(e,f,g)
408	eor	r0,r8,r8,ror#11
409	add	r7,r7,r2			@ h+=Ch(e,f,g)
410#if 4==31
411	and	r12,r12,#0xff
412	cmp	r12,#0xf2			@ done?
413#endif
414#if 4<15
415# if __ARM_ARCH__>=7
416	ldr	r2,[r1],#4			@ prefetch
417# else
418	ldrb	r2,[r1,#3]
419# endif
420	eor	r12,r8,r9			@ a^b, b^c in next round
421#else
422	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
423	eor	r12,r8,r9			@ a^b, b^c in next round
424	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
425#endif
426	eor	r0,r0,r8,ror#20	@ Sigma0(a)
427	and	r3,r3,r12			@ (b^c)&=(a^b)
428	add	r11,r11,r7			@ d+=h
429	eor	r3,r3,r9			@ Maj(a,b,c)
430	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
431	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
432#if __ARM_ARCH__>=7
433	@ ldr	r2,[r1],#4			@ 5
434# if 5==15
435	str	r1,[sp,#17*4]			@ make room for r1
436# endif
437	eor	r0,r11,r11,ror#5
438	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
439	eor	r0,r0,r11,ror#19	@ Sigma1(e)
440# ifndef __ARMEB__
441	rev	r2,r2
442# endif
443#else
444	@ ldrb	r2,[r1,#3]			@ 5
445	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
446	ldrb	r3,[r1,#2]
447	ldrb	r0,[r1,#1]
448	orr	r2,r2,r3,lsl#8
449	ldrb	r3,[r1],#4
450	orr	r2,r2,r0,lsl#16
451# if 5==15
452	str	r1,[sp,#17*4]			@ make room for r1
453# endif
454	eor	r0,r11,r11,ror#5
455	orr	r2,r2,r3,lsl#24
456	eor	r0,r0,r11,ror#19	@ Sigma1(e)
457#endif
458	ldr	r3,[r14],#4			@ *K256++
459	add	r6,r6,r2			@ h+=X[i]
460	str	r2,[sp,#5*4]
461	eor	r2,r4,r5
462	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
463	and	r2,r2,r11
464	add	r6,r6,r3			@ h+=K256[i]
465	eor	r2,r2,r5			@ Ch(e,f,g)
466	eor	r0,r7,r7,ror#11
467	add	r6,r6,r2			@ h+=Ch(e,f,g)
468#if 5==31
469	and	r3,r3,#0xff
470	cmp	r3,#0xf2			@ done?
471#endif
472#if 5<15
473# if __ARM_ARCH__>=7
474	ldr	r2,[r1],#4			@ prefetch
475# else
476	ldrb	r2,[r1,#3]
477# endif
478	eor	r3,r7,r8			@ a^b, b^c in next round
479#else
480	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
481	eor	r3,r7,r8			@ a^b, b^c in next round
482	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
483#endif
484	eor	r0,r0,r7,ror#20	@ Sigma0(a)
485	and	r12,r12,r3			@ (b^c)&=(a^b)
486	add	r10,r10,r6			@ d+=h
487	eor	r12,r12,r8			@ Maj(a,b,c)
488	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
489	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
490#if __ARM_ARCH__>=7
491	@ ldr	r2,[r1],#4			@ 6
492# if 6==15
493	str	r1,[sp,#17*4]			@ make room for r1
494# endif
495	eor	r0,r10,r10,ror#5
496	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
497	eor	r0,r0,r10,ror#19	@ Sigma1(e)
498# ifndef __ARMEB__
499	rev	r2,r2
500# endif
501#else
502	@ ldrb	r2,[r1,#3]			@ 6
503	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
504	ldrb	r12,[r1,#2]
505	ldrb	r0,[r1,#1]
506	orr	r2,r2,r12,lsl#8
507	ldrb	r12,[r1],#4
508	orr	r2,r2,r0,lsl#16
509# if 6==15
510	str	r1,[sp,#17*4]			@ make room for r1
511# endif
512	eor	r0,r10,r10,ror#5
513	orr	r2,r2,r12,lsl#24
514	eor	r0,r0,r10,ror#19	@ Sigma1(e)
515#endif
516	ldr	r12,[r14],#4			@ *K256++
517	add	r5,r5,r2			@ h+=X[i]
518	str	r2,[sp,#6*4]
519	eor	r2,r11,r4
520	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
521	and	r2,r2,r10
522	add	r5,r5,r12			@ h+=K256[i]
523	eor	r2,r2,r4			@ Ch(e,f,g)
524	eor	r0,r6,r6,ror#11
525	add	r5,r5,r2			@ h+=Ch(e,f,g)
526#if 6==31
527	and	r12,r12,#0xff
528	cmp	r12,#0xf2			@ done?
529#endif
530#if 6<15
531# if __ARM_ARCH__>=7
532	ldr	r2,[r1],#4			@ prefetch
533# else
534	ldrb	r2,[r1,#3]
535# endif
536	eor	r12,r6,r7			@ a^b, b^c in next round
537#else
538	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
539	eor	r12,r6,r7			@ a^b, b^c in next round
540	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
541#endif
542	eor	r0,r0,r6,ror#20	@ Sigma0(a)
543	and	r3,r3,r12			@ (b^c)&=(a^b)
544	add	r9,r9,r5			@ d+=h
545	eor	r3,r3,r7			@ Maj(a,b,c)
546	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
547	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
548#if __ARM_ARCH__>=7
549	@ ldr	r2,[r1],#4			@ 7
550# if 7==15
551	str	r1,[sp,#17*4]			@ make room for r1
552# endif
553	eor	r0,r9,r9,ror#5
554	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
555	eor	r0,r0,r9,ror#19	@ Sigma1(e)
556# ifndef __ARMEB__
557	rev	r2,r2
558# endif
559#else
560	@ ldrb	r2,[r1,#3]			@ 7
561	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
562	ldrb	r3,[r1,#2]
563	ldrb	r0,[r1,#1]
564	orr	r2,r2,r3,lsl#8
565	ldrb	r3,[r1],#4
566	orr	r2,r2,r0,lsl#16
567# if 7==15
568	str	r1,[sp,#17*4]			@ make room for r1
569# endif
570	eor	r0,r9,r9,ror#5
571	orr	r2,r2,r3,lsl#24
572	eor	r0,r0,r9,ror#19	@ Sigma1(e)
573#endif
574	ldr	r3,[r14],#4			@ *K256++
575	add	r4,r4,r2			@ h+=X[i]
576	str	r2,[sp,#7*4]
577	eor	r2,r10,r11
578	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
579	and	r2,r2,r9
580	add	r4,r4,r3			@ h+=K256[i]
581	eor	r2,r2,r11			@ Ch(e,f,g)
582	eor	r0,r5,r5,ror#11
583	add	r4,r4,r2			@ h+=Ch(e,f,g)
584#if 7==31
585	and	r3,r3,#0xff
586	cmp	r3,#0xf2			@ done?
587#endif
588#if 7<15
589# if __ARM_ARCH__>=7
590	ldr	r2,[r1],#4			@ prefetch
591# else
592	ldrb	r2,[r1,#3]
593# endif
594	eor	r3,r5,r6			@ a^b, b^c in next round
595#else
596	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
597	eor	r3,r5,r6			@ a^b, b^c in next round
598	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
599#endif
600	eor	r0,r0,r5,ror#20	@ Sigma0(a)
601	and	r12,r12,r3			@ (b^c)&=(a^b)
602	add	r8,r8,r4			@ d+=h
603	eor	r12,r12,r6			@ Maj(a,b,c)
604	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
605	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
606#if __ARM_ARCH__>=7
607	@ ldr	r2,[r1],#4			@ 8
608# if 8==15
609	str	r1,[sp,#17*4]			@ make room for r1
610# endif
611	eor	r0,r8,r8,ror#5
612	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
613	eor	r0,r0,r8,ror#19	@ Sigma1(e)
614# ifndef __ARMEB__
615	rev	r2,r2
616# endif
617#else
618	@ ldrb	r2,[r1,#3]			@ 8
619	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
620	ldrb	r12,[r1,#2]
621	ldrb	r0,[r1,#1]
622	orr	r2,r2,r12,lsl#8
623	ldrb	r12,[r1],#4
624	orr	r2,r2,r0,lsl#16
625# if 8==15
626	str	r1,[sp,#17*4]			@ make room for r1
627# endif
628	eor	r0,r8,r8,ror#5
629	orr	r2,r2,r12,lsl#24
630	eor	r0,r0,r8,ror#19	@ Sigma1(e)
631#endif
632	ldr	r12,[r14],#4			@ *K256++
633	add	r11,r11,r2			@ h+=X[i]
634	str	r2,[sp,#8*4]
635	eor	r2,r9,r10
636	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
637	and	r2,r2,r8
638	add	r11,r11,r12			@ h+=K256[i]
639	eor	r2,r2,r10			@ Ch(e,f,g)
640	eor	r0,r4,r4,ror#11
641	add	r11,r11,r2			@ h+=Ch(e,f,g)
642#if 8==31
643	and	r12,r12,#0xff
644	cmp	r12,#0xf2			@ done?
645#endif
646#if 8<15
647# if __ARM_ARCH__>=7
648	ldr	r2,[r1],#4			@ prefetch
649# else
650	ldrb	r2,[r1,#3]
651# endif
652	eor	r12,r4,r5			@ a^b, b^c in next round
653#else
654	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
655	eor	r12,r4,r5			@ a^b, b^c in next round
656	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
657#endif
658	eor	r0,r0,r4,ror#20	@ Sigma0(a)
659	and	r3,r3,r12			@ (b^c)&=(a^b)
660	add	r7,r7,r11			@ d+=h
661	eor	r3,r3,r5			@ Maj(a,b,c)
662	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
663	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
664#if __ARM_ARCH__>=7
665	@ ldr	r2,[r1],#4			@ 9
666# if 9==15
667	str	r1,[sp,#17*4]			@ make room for r1
668# endif
669	eor	r0,r7,r7,ror#5
670	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
671	eor	r0,r0,r7,ror#19	@ Sigma1(e)
672# ifndef __ARMEB__
673	rev	r2,r2
674# endif
675#else
676	@ ldrb	r2,[r1,#3]			@ 9
677	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
678	ldrb	r3,[r1,#2]
679	ldrb	r0,[r1,#1]
680	orr	r2,r2,r3,lsl#8
681	ldrb	r3,[r1],#4
682	orr	r2,r2,r0,lsl#16
683# if 9==15
684	str	r1,[sp,#17*4]			@ make room for r1
685# endif
686	eor	r0,r7,r7,ror#5
687	orr	r2,r2,r3,lsl#24
688	eor	r0,r0,r7,ror#19	@ Sigma1(e)
689#endif
690	ldr	r3,[r14],#4			@ *K256++
691	add	r10,r10,r2			@ h+=X[i]
692	str	r2,[sp,#9*4]
693	eor	r2,r8,r9
694	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
695	and	r2,r2,r7
696	add	r10,r10,r3			@ h+=K256[i]
697	eor	r2,r2,r9			@ Ch(e,f,g)
698	eor	r0,r11,r11,ror#11
699	add	r10,r10,r2			@ h+=Ch(e,f,g)
700#if 9==31
701	and	r3,r3,#0xff
702	cmp	r3,#0xf2			@ done?
703#endif
704#if 9<15
705# if __ARM_ARCH__>=7
706	ldr	r2,[r1],#4			@ prefetch
707# else
708	ldrb	r2,[r1,#3]
709# endif
710	eor	r3,r11,r4			@ a^b, b^c in next round
711#else
712	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
713	eor	r3,r11,r4			@ a^b, b^c in next round
714	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
715#endif
716	eor	r0,r0,r11,ror#20	@ Sigma0(a)
717	and	r12,r12,r3			@ (b^c)&=(a^b)
718	add	r6,r6,r10			@ d+=h
719	eor	r12,r12,r4			@ Maj(a,b,c)
720	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
721	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
722#if __ARM_ARCH__>=7
723	@ ldr	r2,[r1],#4			@ 10
724# if 10==15
725	str	r1,[sp,#17*4]			@ make room for r1
726# endif
727	eor	r0,r6,r6,ror#5
728	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
729	eor	r0,r0,r6,ror#19	@ Sigma1(e)
730# ifndef __ARMEB__
731	rev	r2,r2
732# endif
733#else
734	@ ldrb	r2,[r1,#3]			@ 10
735	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
736	ldrb	r12,[r1,#2]
737	ldrb	r0,[r1,#1]
738	orr	r2,r2,r12,lsl#8
739	ldrb	r12,[r1],#4
740	orr	r2,r2,r0,lsl#16
741# if 10==15
742	str	r1,[sp,#17*4]			@ make room for r1
743# endif
744	eor	r0,r6,r6,ror#5
745	orr	r2,r2,r12,lsl#24
746	eor	r0,r0,r6,ror#19	@ Sigma1(e)
747#endif
748	ldr	r12,[r14],#4			@ *K256++
749	add	r9,r9,r2			@ h+=X[i]
750	str	r2,[sp,#10*4]
751	eor	r2,r7,r8
752	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
753	and	r2,r2,r6
754	add	r9,r9,r12			@ h+=K256[i]
755	eor	r2,r2,r8			@ Ch(e,f,g)
756	eor	r0,r10,r10,ror#11
757	add	r9,r9,r2			@ h+=Ch(e,f,g)
758#if 10==31
759	and	r12,r12,#0xff
760	cmp	r12,#0xf2			@ done?
761#endif
762#if 10<15
763# if __ARM_ARCH__>=7
764	ldr	r2,[r1],#4			@ prefetch
765# else
766	ldrb	r2,[r1,#3]
767# endif
768	eor	r12,r10,r11			@ a^b, b^c in next round
769#else
770	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
771	eor	r12,r10,r11			@ a^b, b^c in next round
772	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
773#endif
774	eor	r0,r0,r10,ror#20	@ Sigma0(a)
775	and	r3,r3,r12			@ (b^c)&=(a^b)
776	add	r5,r5,r9			@ d+=h
777	eor	r3,r3,r11			@ Maj(a,b,c)
778	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
779	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
780#if __ARM_ARCH__>=7
781	@ ldr	r2,[r1],#4			@ 11
782# if 11==15
783	str	r1,[sp,#17*4]			@ make room for r1
784# endif
785	eor	r0,r5,r5,ror#5
786	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
787	eor	r0,r0,r5,ror#19	@ Sigma1(e)
788# ifndef __ARMEB__
789	rev	r2,r2
790# endif
791#else
792	@ ldrb	r2,[r1,#3]			@ 11
793	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
794	ldrb	r3,[r1,#2]
795	ldrb	r0,[r1,#1]
796	orr	r2,r2,r3,lsl#8
797	ldrb	r3,[r1],#4
798	orr	r2,r2,r0,lsl#16
799# if 11==15
800	str	r1,[sp,#17*4]			@ make room for r1
801# endif
802	eor	r0,r5,r5,ror#5
803	orr	r2,r2,r3,lsl#24
804	eor	r0,r0,r5,ror#19	@ Sigma1(e)
805#endif
806	ldr	r3,[r14],#4			@ *K256++
807	add	r8,r8,r2			@ h+=X[i]
808	str	r2,[sp,#11*4]
809	eor	r2,r6,r7
810	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
811	and	r2,r2,r5
812	add	r8,r8,r3			@ h+=K256[i]
813	eor	r2,r2,r7			@ Ch(e,f,g)
814	eor	r0,r9,r9,ror#11
815	add	r8,r8,r2			@ h+=Ch(e,f,g)
816#if 11==31
817	and	r3,r3,#0xff
818	cmp	r3,#0xf2			@ done?
819#endif
820#if 11<15
821# if __ARM_ARCH__>=7
822	ldr	r2,[r1],#4			@ prefetch
823# else
824	ldrb	r2,[r1,#3]
825# endif
826	eor	r3,r9,r10			@ a^b, b^c in next round
827#else
828	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
829	eor	r3,r9,r10			@ a^b, b^c in next round
830	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
831#endif
832	eor	r0,r0,r9,ror#20	@ Sigma0(a)
833	and	r12,r12,r3			@ (b^c)&=(a^b)
834	add	r4,r4,r8			@ d+=h
835	eor	r12,r12,r10			@ Maj(a,b,c)
836	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
837	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
838#if __ARM_ARCH__>=7
839	@ ldr	r2,[r1],#4			@ 12
840# if 12==15
841	str	r1,[sp,#17*4]			@ make room for r1
842# endif
843	eor	r0,r4,r4,ror#5
844	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
845	eor	r0,r0,r4,ror#19	@ Sigma1(e)
846# ifndef __ARMEB__
847	rev	r2,r2
848# endif
849#else
850	@ ldrb	r2,[r1,#3]			@ 12
851	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
852	ldrb	r12,[r1,#2]
853	ldrb	r0,[r1,#1]
854	orr	r2,r2,r12,lsl#8
855	ldrb	r12,[r1],#4
856	orr	r2,r2,r0,lsl#16
857# if 12==15
858	str	r1,[sp,#17*4]			@ make room for r1
859# endif
860	eor	r0,r4,r4,ror#5
861	orr	r2,r2,r12,lsl#24
862	eor	r0,r0,r4,ror#19	@ Sigma1(e)
863#endif
864	ldr	r12,[r14],#4			@ *K256++
865	add	r7,r7,r2			@ h+=X[i]
866	str	r2,[sp,#12*4]
867	eor	r2,r5,r6
868	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
869	and	r2,r2,r4
870	add	r7,r7,r12			@ h+=K256[i]
871	eor	r2,r2,r6			@ Ch(e,f,g)
872	eor	r0,r8,r8,ror#11
873	add	r7,r7,r2			@ h+=Ch(e,f,g)
874#if 12==31
875	and	r12,r12,#0xff
876	cmp	r12,#0xf2			@ done?
877#endif
878#if 12<15
879# if __ARM_ARCH__>=7
880	ldr	r2,[r1],#4			@ prefetch
881# else
882	ldrb	r2,[r1,#3]
883# endif
884	eor	r12,r8,r9			@ a^b, b^c in next round
885#else
886	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
887	eor	r12,r8,r9			@ a^b, b^c in next round
888	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
889#endif
890	eor	r0,r0,r8,ror#20	@ Sigma0(a)
891	and	r3,r3,r12			@ (b^c)&=(a^b)
892	add	r11,r11,r7			@ d+=h
893	eor	r3,r3,r9			@ Maj(a,b,c)
894	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
895	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
896#if __ARM_ARCH__>=7
897	@ ldr	r2,[r1],#4			@ 13
898# if 13==15
899	str	r1,[sp,#17*4]			@ make room for r1
900# endif
901	eor	r0,r11,r11,ror#5
902	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
903	eor	r0,r0,r11,ror#19	@ Sigma1(e)
904# ifndef __ARMEB__
905	rev	r2,r2
906# endif
907#else
908	@ ldrb	r2,[r1,#3]			@ 13
909	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
910	ldrb	r3,[r1,#2]
911	ldrb	r0,[r1,#1]
912	orr	r2,r2,r3,lsl#8
913	ldrb	r3,[r1],#4
914	orr	r2,r2,r0,lsl#16
915# if 13==15
916	str	r1,[sp,#17*4]			@ make room for r1
917# endif
918	eor	r0,r11,r11,ror#5
919	orr	r2,r2,r3,lsl#24
920	eor	r0,r0,r11,ror#19	@ Sigma1(e)
921#endif
922	ldr	r3,[r14],#4			@ *K256++
923	add	r6,r6,r2			@ h+=X[i]
924	str	r2,[sp,#13*4]
925	eor	r2,r4,r5
926	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
927	and	r2,r2,r11
928	add	r6,r6,r3			@ h+=K256[i]
929	eor	r2,r2,r5			@ Ch(e,f,g)
930	eor	r0,r7,r7,ror#11
931	add	r6,r6,r2			@ h+=Ch(e,f,g)
932#if 13==31
933	and	r3,r3,#0xff
934	cmp	r3,#0xf2			@ done?
935#endif
936#if 13<15
937# if __ARM_ARCH__>=7
938	ldr	r2,[r1],#4			@ prefetch
939# else
940	ldrb	r2,[r1,#3]
941# endif
942	eor	r3,r7,r8			@ a^b, b^c in next round
943#else
944	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
945	eor	r3,r7,r8			@ a^b, b^c in next round
946	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
947#endif
948	eor	r0,r0,r7,ror#20	@ Sigma0(a)
949	and	r12,r12,r3			@ (b^c)&=(a^b)
950	add	r10,r10,r6			@ d+=h
951	eor	r12,r12,r8			@ Maj(a,b,c)
952	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
953	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
954#if __ARM_ARCH__>=7
955	@ ldr	r2,[r1],#4			@ 14
956# if 14==15
957	str	r1,[sp,#17*4]			@ make room for r1
958# endif
959	eor	r0,r10,r10,ror#5
960	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
961	eor	r0,r0,r10,ror#19	@ Sigma1(e)
962# ifndef __ARMEB__
963	rev	r2,r2
964# endif
965#else
966	@ ldrb	r2,[r1,#3]			@ 14
967	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
968	ldrb	r12,[r1,#2]
969	ldrb	r0,[r1,#1]
970	orr	r2,r2,r12,lsl#8
971	ldrb	r12,[r1],#4
972	orr	r2,r2,r0,lsl#16
973# if 14==15
974	str	r1,[sp,#17*4]			@ make room for r1
975# endif
976	eor	r0,r10,r10,ror#5
977	orr	r2,r2,r12,lsl#24
978	eor	r0,r0,r10,ror#19	@ Sigma1(e)
979#endif
980	ldr	r12,[r14],#4			@ *K256++
981	add	r5,r5,r2			@ h+=X[i]
982	str	r2,[sp,#14*4]
983	eor	r2,r11,r4
984	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
985	and	r2,r2,r10
986	add	r5,r5,r12			@ h+=K256[i]
987	eor	r2,r2,r4			@ Ch(e,f,g)
988	eor	r0,r6,r6,ror#11
989	add	r5,r5,r2			@ h+=Ch(e,f,g)
990#if 14==31
991	and	r12,r12,#0xff
992	cmp	r12,#0xf2			@ done?
993#endif
994#if 14<15
995# if __ARM_ARCH__>=7
996	ldr	r2,[r1],#4			@ prefetch
997# else
998	ldrb	r2,[r1,#3]
999# endif
1000	eor	r12,r6,r7			@ a^b, b^c in next round
1001#else
1002	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1003	eor	r12,r6,r7			@ a^b, b^c in next round
1004	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1005#endif
1006	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1007	and	r3,r3,r12			@ (b^c)&=(a^b)
1008	add	r9,r9,r5			@ d+=h
1009	eor	r3,r3,r7			@ Maj(a,b,c)
1010	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1011	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1012#if __ARM_ARCH__>=7
1013	@ ldr	r2,[r1],#4			@ 15
1014# if 15==15
1015	str	r1,[sp,#17*4]			@ make room for r1
1016# endif
1017	eor	r0,r9,r9,ror#5
1018	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1019	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1020# ifndef __ARMEB__
1021	rev	r2,r2
1022# endif
1023#else
1024	@ ldrb	r2,[r1,#3]			@ 15
1025	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1026	ldrb	r3,[r1,#2]
1027	ldrb	r0,[r1,#1]
1028	orr	r2,r2,r3,lsl#8
1029	ldrb	r3,[r1],#4
1030	orr	r2,r2,r0,lsl#16
1031# if 15==15
1032	str	r1,[sp,#17*4]			@ make room for r1
1033# endif
1034	eor	r0,r9,r9,ror#5
1035	orr	r2,r2,r3,lsl#24
1036	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1037#endif
1038	ldr	r3,[r14],#4			@ *K256++
1039	add	r4,r4,r2			@ h+=X[i]
1040	str	r2,[sp,#15*4]
1041	eor	r2,r10,r11
1042	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1043	and	r2,r2,r9
1044	add	r4,r4,r3			@ h+=K256[i]
1045	eor	r2,r2,r11			@ Ch(e,f,g)
1046	eor	r0,r5,r5,ror#11
1047	add	r4,r4,r2			@ h+=Ch(e,f,g)
1048#if 15==31
1049	and	r3,r3,#0xff
1050	cmp	r3,#0xf2			@ done?
1051#endif
1052#if 15<15
1053# if __ARM_ARCH__>=7
1054	ldr	r2,[r1],#4			@ prefetch
1055# else
1056	ldrb	r2,[r1,#3]
1057# endif
1058	eor	r3,r5,r6			@ a^b, b^c in next round
1059#else
1060	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1061	eor	r3,r5,r6			@ a^b, b^c in next round
1062	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1063#endif
1064	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1065	and	r12,r12,r3			@ (b^c)&=(a^b)
1066	add	r8,r8,r4			@ d+=h
1067	eor	r12,r12,r6			@ Maj(a,b,c)
1068	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1069	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1070Lrounds_16_xx:
1071	@ ldr	r2,[sp,#1*4]		@ 16
1072	@ ldr	r1,[sp,#14*4]
1073	mov	r0,r2,ror#7
1074	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1075	mov	r12,r1,ror#17
1076	eor	r0,r0,r2,ror#18
1077	eor	r12,r12,r1,ror#19
1078	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1079	ldr	r2,[sp,#0*4]
1080	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1081	ldr	r1,[sp,#9*4]
1082
1083	add	r12,r12,r0
1084	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1085	add	r2,r2,r12
1086	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1087	add	r2,r2,r1			@ X[i]
1088	ldr	r12,[r14],#4			@ *K256++
1089	add	r11,r11,r2			@ h+=X[i]
1090	str	r2,[sp,#0*4]
1091	eor	r2,r9,r10
1092	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1093	and	r2,r2,r8
1094	add	r11,r11,r12			@ h+=K256[i]
1095	eor	r2,r2,r10			@ Ch(e,f,g)
1096	eor	r0,r4,r4,ror#11
1097	add	r11,r11,r2			@ h+=Ch(e,f,g)
1098#if 16==31
1099	and	r12,r12,#0xff
1100	cmp	r12,#0xf2			@ done?
1101#endif
1102#if 16<15
1103# if __ARM_ARCH__>=7
1104	ldr	r2,[r1],#4			@ prefetch
1105# else
1106	ldrb	r2,[r1,#3]
1107# endif
1108	eor	r12,r4,r5			@ a^b, b^c in next round
1109#else
1110	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1111	eor	r12,r4,r5			@ a^b, b^c in next round
1112	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1113#endif
1114	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1115	and	r3,r3,r12			@ (b^c)&=(a^b)
1116	add	r7,r7,r11			@ d+=h
1117	eor	r3,r3,r5			@ Maj(a,b,c)
1118	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1119	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1120	@ ldr	r2,[sp,#2*4]		@ 17
1121	@ ldr	r1,[sp,#15*4]
1122	mov	r0,r2,ror#7
1123	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1124	mov	r3,r1,ror#17
1125	eor	r0,r0,r2,ror#18
1126	eor	r3,r3,r1,ror#19
1127	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1128	ldr	r2,[sp,#1*4]
1129	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1130	ldr	r1,[sp,#10*4]
1131
1132	add	r3,r3,r0
1133	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1134	add	r2,r2,r3
1135	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1136	add	r2,r2,r1			@ X[i]
1137	ldr	r3,[r14],#4			@ *K256++
1138	add	r10,r10,r2			@ h+=X[i]
1139	str	r2,[sp,#1*4]
1140	eor	r2,r8,r9
1141	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1142	and	r2,r2,r7
1143	add	r10,r10,r3			@ h+=K256[i]
1144	eor	r2,r2,r9			@ Ch(e,f,g)
1145	eor	r0,r11,r11,ror#11
1146	add	r10,r10,r2			@ h+=Ch(e,f,g)
1147#if 17==31
1148	and	r3,r3,#0xff
1149	cmp	r3,#0xf2			@ done?
1150#endif
1151#if 17<15
1152# if __ARM_ARCH__>=7
1153	ldr	r2,[r1],#4			@ prefetch
1154# else
1155	ldrb	r2,[r1,#3]
1156# endif
1157	eor	r3,r11,r4			@ a^b, b^c in next round
1158#else
1159	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1160	eor	r3,r11,r4			@ a^b, b^c in next round
1161	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1162#endif
1163	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1164	and	r12,r12,r3			@ (b^c)&=(a^b)
1165	add	r6,r6,r10			@ d+=h
1166	eor	r12,r12,r4			@ Maj(a,b,c)
1167	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1168	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1169	@ ldr	r2,[sp,#3*4]		@ 18
1170	@ ldr	r1,[sp,#0*4]
1171	mov	r0,r2,ror#7
1172	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1173	mov	r12,r1,ror#17
1174	eor	r0,r0,r2,ror#18
1175	eor	r12,r12,r1,ror#19
1176	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1177	ldr	r2,[sp,#2*4]
1178	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1179	ldr	r1,[sp,#11*4]
1180
1181	add	r12,r12,r0
1182	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1183	add	r2,r2,r12
1184	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1185	add	r2,r2,r1			@ X[i]
1186	ldr	r12,[r14],#4			@ *K256++
1187	add	r9,r9,r2			@ h+=X[i]
1188	str	r2,[sp,#2*4]
1189	eor	r2,r7,r8
1190	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1191	and	r2,r2,r6
1192	add	r9,r9,r12			@ h+=K256[i]
1193	eor	r2,r2,r8			@ Ch(e,f,g)
1194	eor	r0,r10,r10,ror#11
1195	add	r9,r9,r2			@ h+=Ch(e,f,g)
1196#if 18==31
1197	and	r12,r12,#0xff
1198	cmp	r12,#0xf2			@ done?
1199#endif
1200#if 18<15
1201# if __ARM_ARCH__>=7
1202	ldr	r2,[r1],#4			@ prefetch
1203# else
1204	ldrb	r2,[r1,#3]
1205# endif
1206	eor	r12,r10,r11			@ a^b, b^c in next round
1207#else
1208	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1209	eor	r12,r10,r11			@ a^b, b^c in next round
1210	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1211#endif
1212	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1213	and	r3,r3,r12			@ (b^c)&=(a^b)
1214	add	r5,r5,r9			@ d+=h
1215	eor	r3,r3,r11			@ Maj(a,b,c)
1216	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1217	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1218	@ ldr	r2,[sp,#4*4]		@ 19
1219	@ ldr	r1,[sp,#1*4]
1220	mov	r0,r2,ror#7
1221	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1222	mov	r3,r1,ror#17
1223	eor	r0,r0,r2,ror#18
1224	eor	r3,r3,r1,ror#19
1225	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1226	ldr	r2,[sp,#3*4]
1227	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1228	ldr	r1,[sp,#12*4]
1229
1230	add	r3,r3,r0
1231	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1232	add	r2,r2,r3
1233	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1234	add	r2,r2,r1			@ X[i]
1235	ldr	r3,[r14],#4			@ *K256++
1236	add	r8,r8,r2			@ h+=X[i]
1237	str	r2,[sp,#3*4]
1238	eor	r2,r6,r7
1239	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1240	and	r2,r2,r5
1241	add	r8,r8,r3			@ h+=K256[i]
1242	eor	r2,r2,r7			@ Ch(e,f,g)
1243	eor	r0,r9,r9,ror#11
1244	add	r8,r8,r2			@ h+=Ch(e,f,g)
1245#if 19==31
1246	and	r3,r3,#0xff
1247	cmp	r3,#0xf2			@ done?
1248#endif
1249#if 19<15
1250# if __ARM_ARCH__>=7
1251	ldr	r2,[r1],#4			@ prefetch
1252# else
1253	ldrb	r2,[r1,#3]
1254# endif
1255	eor	r3,r9,r10			@ a^b, b^c in next round
1256#else
1257	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1258	eor	r3,r9,r10			@ a^b, b^c in next round
1259	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1260#endif
1261	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1262	and	r12,r12,r3			@ (b^c)&=(a^b)
1263	add	r4,r4,r8			@ d+=h
1264	eor	r12,r12,r10			@ Maj(a,b,c)
1265	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1266	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1267	@ ldr	r2,[sp,#5*4]		@ 20
1268	@ ldr	r1,[sp,#2*4]
1269	mov	r0,r2,ror#7
1270	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1271	mov	r12,r1,ror#17
1272	eor	r0,r0,r2,ror#18
1273	eor	r12,r12,r1,ror#19
1274	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1275	ldr	r2,[sp,#4*4]
1276	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1277	ldr	r1,[sp,#13*4]
1278
1279	add	r12,r12,r0
1280	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1281	add	r2,r2,r12
1282	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1283	add	r2,r2,r1			@ X[i]
1284	ldr	r12,[r14],#4			@ *K256++
1285	add	r7,r7,r2			@ h+=X[i]
1286	str	r2,[sp,#4*4]
1287	eor	r2,r5,r6
1288	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1289	and	r2,r2,r4
1290	add	r7,r7,r12			@ h+=K256[i]
1291	eor	r2,r2,r6			@ Ch(e,f,g)
1292	eor	r0,r8,r8,ror#11
1293	add	r7,r7,r2			@ h+=Ch(e,f,g)
1294#if 20==31
1295	and	r12,r12,#0xff
1296	cmp	r12,#0xf2			@ done?
1297#endif
1298#if 20<15
1299# if __ARM_ARCH__>=7
1300	ldr	r2,[r1],#4			@ prefetch
1301# else
1302	ldrb	r2,[r1,#3]
1303# endif
1304	eor	r12,r8,r9			@ a^b, b^c in next round
1305#else
1306	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1307	eor	r12,r8,r9			@ a^b, b^c in next round
1308	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1309#endif
1310	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1311	and	r3,r3,r12			@ (b^c)&=(a^b)
1312	add	r11,r11,r7			@ d+=h
1313	eor	r3,r3,r9			@ Maj(a,b,c)
1314	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1315	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1316	@ ldr	r2,[sp,#6*4]		@ 21
1317	@ ldr	r1,[sp,#3*4]
1318	mov	r0,r2,ror#7
1319	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1320	mov	r3,r1,ror#17
1321	eor	r0,r0,r2,ror#18
1322	eor	r3,r3,r1,ror#19
1323	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1324	ldr	r2,[sp,#5*4]
1325	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1326	ldr	r1,[sp,#14*4]
1327
1328	add	r3,r3,r0
1329	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1330	add	r2,r2,r3
1331	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1332	add	r2,r2,r1			@ X[i]
1333	ldr	r3,[r14],#4			@ *K256++
1334	add	r6,r6,r2			@ h+=X[i]
1335	str	r2,[sp,#5*4]
1336	eor	r2,r4,r5
1337	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1338	and	r2,r2,r11
1339	add	r6,r6,r3			@ h+=K256[i]
1340	eor	r2,r2,r5			@ Ch(e,f,g)
1341	eor	r0,r7,r7,ror#11
1342	add	r6,r6,r2			@ h+=Ch(e,f,g)
1343#if 21==31
1344	and	r3,r3,#0xff
1345	cmp	r3,#0xf2			@ done?
1346#endif
1347#if 21<15
1348# if __ARM_ARCH__>=7
1349	ldr	r2,[r1],#4			@ prefetch
1350# else
1351	ldrb	r2,[r1,#3]
1352# endif
1353	eor	r3,r7,r8			@ a^b, b^c in next round
1354#else
1355	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1356	eor	r3,r7,r8			@ a^b, b^c in next round
1357	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1358#endif
1359	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1360	and	r12,r12,r3			@ (b^c)&=(a^b)
1361	add	r10,r10,r6			@ d+=h
1362	eor	r12,r12,r8			@ Maj(a,b,c)
1363	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1364	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1365	@ ldr	r2,[sp,#7*4]		@ 22
1366	@ ldr	r1,[sp,#4*4]
1367	mov	r0,r2,ror#7
1368	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1369	mov	r12,r1,ror#17
1370	eor	r0,r0,r2,ror#18
1371	eor	r12,r12,r1,ror#19
1372	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1373	ldr	r2,[sp,#6*4]
1374	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1375	ldr	r1,[sp,#15*4]
1376
1377	add	r12,r12,r0
1378	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1379	add	r2,r2,r12
1380	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1381	add	r2,r2,r1			@ X[i]
1382	ldr	r12,[r14],#4			@ *K256++
1383	add	r5,r5,r2			@ h+=X[i]
1384	str	r2,[sp,#6*4]
1385	eor	r2,r11,r4
1386	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1387	and	r2,r2,r10
1388	add	r5,r5,r12			@ h+=K256[i]
1389	eor	r2,r2,r4			@ Ch(e,f,g)
1390	eor	r0,r6,r6,ror#11
1391	add	r5,r5,r2			@ h+=Ch(e,f,g)
1392#if 22==31
1393	and	r12,r12,#0xff
1394	cmp	r12,#0xf2			@ done?
1395#endif
1396#if 22<15
1397# if __ARM_ARCH__>=7
1398	ldr	r2,[r1],#4			@ prefetch
1399# else
1400	ldrb	r2,[r1,#3]
1401# endif
1402	eor	r12,r6,r7			@ a^b, b^c in next round
1403#else
1404	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1405	eor	r12,r6,r7			@ a^b, b^c in next round
1406	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1407#endif
1408	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1409	and	r3,r3,r12			@ (b^c)&=(a^b)
1410	add	r9,r9,r5			@ d+=h
1411	eor	r3,r3,r7			@ Maj(a,b,c)
1412	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1413	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1414	@ ldr	r2,[sp,#8*4]		@ 23
1415	@ ldr	r1,[sp,#5*4]
1416	mov	r0,r2,ror#7
1417	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1418	mov	r3,r1,ror#17
1419	eor	r0,r0,r2,ror#18
1420	eor	r3,r3,r1,ror#19
1421	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1422	ldr	r2,[sp,#7*4]
1423	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1424	ldr	r1,[sp,#0*4]
1425
1426	add	r3,r3,r0
1427	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1428	add	r2,r2,r3
1429	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1430	add	r2,r2,r1			@ X[i]
1431	ldr	r3,[r14],#4			@ *K256++
1432	add	r4,r4,r2			@ h+=X[i]
1433	str	r2,[sp,#7*4]
1434	eor	r2,r10,r11
1435	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1436	and	r2,r2,r9
1437	add	r4,r4,r3			@ h+=K256[i]
1438	eor	r2,r2,r11			@ Ch(e,f,g)
1439	eor	r0,r5,r5,ror#11
1440	add	r4,r4,r2			@ h+=Ch(e,f,g)
1441#if 23==31
1442	and	r3,r3,#0xff
1443	cmp	r3,#0xf2			@ done?
1444#endif
1445#if 23<15
1446# if __ARM_ARCH__>=7
1447	ldr	r2,[r1],#4			@ prefetch
1448# else
1449	ldrb	r2,[r1,#3]
1450# endif
1451	eor	r3,r5,r6			@ a^b, b^c in next round
1452#else
1453	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1454	eor	r3,r5,r6			@ a^b, b^c in next round
1455	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1456#endif
1457	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1458	and	r12,r12,r3			@ (b^c)&=(a^b)
1459	add	r8,r8,r4			@ d+=h
1460	eor	r12,r12,r6			@ Maj(a,b,c)
1461	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1462	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1463	@ ldr	r2,[sp,#9*4]		@ 24
1464	@ ldr	r1,[sp,#6*4]
1465	mov	r0,r2,ror#7
1466	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1467	mov	r12,r1,ror#17
1468	eor	r0,r0,r2,ror#18
1469	eor	r12,r12,r1,ror#19
1470	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1471	ldr	r2,[sp,#8*4]
1472	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1473	ldr	r1,[sp,#1*4]
1474
1475	add	r12,r12,r0
1476	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1477	add	r2,r2,r12
1478	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1479	add	r2,r2,r1			@ X[i]
1480	ldr	r12,[r14],#4			@ *K256++
1481	add	r11,r11,r2			@ h+=X[i]
1482	str	r2,[sp,#8*4]
1483	eor	r2,r9,r10
1484	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1485	and	r2,r2,r8
1486	add	r11,r11,r12			@ h+=K256[i]
1487	eor	r2,r2,r10			@ Ch(e,f,g)
1488	eor	r0,r4,r4,ror#11
1489	add	r11,r11,r2			@ h+=Ch(e,f,g)
1490#if 24==31
1491	and	r12,r12,#0xff
1492	cmp	r12,#0xf2			@ done?
1493#endif
1494#if 24<15
1495# if __ARM_ARCH__>=7
1496	ldr	r2,[r1],#4			@ prefetch
1497# else
1498	ldrb	r2,[r1,#3]
1499# endif
1500	eor	r12,r4,r5			@ a^b, b^c in next round
1501#else
1502	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1503	eor	r12,r4,r5			@ a^b, b^c in next round
1504	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1505#endif
1506	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1507	and	r3,r3,r12			@ (b^c)&=(a^b)
1508	add	r7,r7,r11			@ d+=h
1509	eor	r3,r3,r5			@ Maj(a,b,c)
1510	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1511	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1512	@ ldr	r2,[sp,#10*4]		@ 25
1513	@ ldr	r1,[sp,#7*4]
1514	mov	r0,r2,ror#7
1515	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1516	mov	r3,r1,ror#17
1517	eor	r0,r0,r2,ror#18
1518	eor	r3,r3,r1,ror#19
1519	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1520	ldr	r2,[sp,#9*4]
1521	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1522	ldr	r1,[sp,#2*4]
1523
1524	add	r3,r3,r0
1525	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1526	add	r2,r2,r3
1527	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1528	add	r2,r2,r1			@ X[i]
1529	ldr	r3,[r14],#4			@ *K256++
1530	add	r10,r10,r2			@ h+=X[i]
1531	str	r2,[sp,#9*4]
1532	eor	r2,r8,r9
1533	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1534	and	r2,r2,r7
1535	add	r10,r10,r3			@ h+=K256[i]
1536	eor	r2,r2,r9			@ Ch(e,f,g)
1537	eor	r0,r11,r11,ror#11
1538	add	r10,r10,r2			@ h+=Ch(e,f,g)
1539#if 25==31
1540	and	r3,r3,#0xff
1541	cmp	r3,#0xf2			@ done?
1542#endif
1543#if 25<15
1544# if __ARM_ARCH__>=7
1545	ldr	r2,[r1],#4			@ prefetch
1546# else
1547	ldrb	r2,[r1,#3]
1548# endif
1549	eor	r3,r11,r4			@ a^b, b^c in next round
1550#else
1551	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1552	eor	r3,r11,r4			@ a^b, b^c in next round
1553	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1554#endif
1555	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1556	and	r12,r12,r3			@ (b^c)&=(a^b)
1557	add	r6,r6,r10			@ d+=h
1558	eor	r12,r12,r4			@ Maj(a,b,c)
1559	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1560	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1561	@ ldr	r2,[sp,#11*4]		@ 26
1562	@ ldr	r1,[sp,#8*4]
1563	mov	r0,r2,ror#7
1564	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1565	mov	r12,r1,ror#17
1566	eor	r0,r0,r2,ror#18
1567	eor	r12,r12,r1,ror#19
1568	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1569	ldr	r2,[sp,#10*4]
1570	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1571	ldr	r1,[sp,#3*4]
1572
1573	add	r12,r12,r0
1574	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1575	add	r2,r2,r12
1576	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1577	add	r2,r2,r1			@ X[i]
1578	ldr	r12,[r14],#4			@ *K256++
1579	add	r9,r9,r2			@ h+=X[i]
1580	str	r2,[sp,#10*4]
1581	eor	r2,r7,r8
1582	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1583	and	r2,r2,r6
1584	add	r9,r9,r12			@ h+=K256[i]
1585	eor	r2,r2,r8			@ Ch(e,f,g)
1586	eor	r0,r10,r10,ror#11
1587	add	r9,r9,r2			@ h+=Ch(e,f,g)
1588#if 26==31
1589	and	r12,r12,#0xff
1590	cmp	r12,#0xf2			@ done?
1591#endif
1592#if 26<15
1593# if __ARM_ARCH__>=7
1594	ldr	r2,[r1],#4			@ prefetch
1595# else
1596	ldrb	r2,[r1,#3]
1597# endif
1598	eor	r12,r10,r11			@ a^b, b^c in next round
1599#else
1600	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1601	eor	r12,r10,r11			@ a^b, b^c in next round
1602	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1603#endif
1604	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1605	and	r3,r3,r12			@ (b^c)&=(a^b)
1606	add	r5,r5,r9			@ d+=h
1607	eor	r3,r3,r11			@ Maj(a,b,c)
1608	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1609	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1610	@ ldr	r2,[sp,#12*4]		@ 27
1611	@ ldr	r1,[sp,#9*4]
1612	mov	r0,r2,ror#7
1613	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1614	mov	r3,r1,ror#17
1615	eor	r0,r0,r2,ror#18
1616	eor	r3,r3,r1,ror#19
1617	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1618	ldr	r2,[sp,#11*4]
1619	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1620	ldr	r1,[sp,#4*4]
1621
1622	add	r3,r3,r0
1623	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1624	add	r2,r2,r3
1625	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1626	add	r2,r2,r1			@ X[i]
1627	ldr	r3,[r14],#4			@ *K256++
1628	add	r8,r8,r2			@ h+=X[i]
1629	str	r2,[sp,#11*4]
1630	eor	r2,r6,r7
1631	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1632	and	r2,r2,r5
1633	add	r8,r8,r3			@ h+=K256[i]
1634	eor	r2,r2,r7			@ Ch(e,f,g)
1635	eor	r0,r9,r9,ror#11
1636	add	r8,r8,r2			@ h+=Ch(e,f,g)
1637#if 27==31
1638	and	r3,r3,#0xff
1639	cmp	r3,#0xf2			@ done?
1640#endif
1641#if 27<15
1642# if __ARM_ARCH__>=7
1643	ldr	r2,[r1],#4			@ prefetch
1644# else
1645	ldrb	r2,[r1,#3]
1646# endif
1647	eor	r3,r9,r10			@ a^b, b^c in next round
1648#else
1649	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1650	eor	r3,r9,r10			@ a^b, b^c in next round
1651	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1652#endif
1653	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1654	and	r12,r12,r3			@ (b^c)&=(a^b)
1655	add	r4,r4,r8			@ d+=h
1656	eor	r12,r12,r10			@ Maj(a,b,c)
1657	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1658	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1659	@ ldr	r2,[sp,#13*4]		@ 28
1660	@ ldr	r1,[sp,#10*4]
1661	mov	r0,r2,ror#7
1662	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1663	mov	r12,r1,ror#17
1664	eor	r0,r0,r2,ror#18
1665	eor	r12,r12,r1,ror#19
1666	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1667	ldr	r2,[sp,#12*4]
1668	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1669	ldr	r1,[sp,#5*4]
1670
1671	add	r12,r12,r0
1672	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1673	add	r2,r2,r12
1674	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1675	add	r2,r2,r1			@ X[i]
1676	ldr	r12,[r14],#4			@ *K256++
1677	add	r7,r7,r2			@ h+=X[i]
1678	str	r2,[sp,#12*4]
1679	eor	r2,r5,r6
1680	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1681	and	r2,r2,r4
1682	add	r7,r7,r12			@ h+=K256[i]
1683	eor	r2,r2,r6			@ Ch(e,f,g)
1684	eor	r0,r8,r8,ror#11
1685	add	r7,r7,r2			@ h+=Ch(e,f,g)
1686#if 28==31
1687	and	r12,r12,#0xff
1688	cmp	r12,#0xf2			@ done?
1689#endif
1690#if 28<15
1691# if __ARM_ARCH__>=7
1692	ldr	r2,[r1],#4			@ prefetch
1693# else
1694	ldrb	r2,[r1,#3]
1695# endif
1696	eor	r12,r8,r9			@ a^b, b^c in next round
1697#else
1698	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1699	eor	r12,r8,r9			@ a^b, b^c in next round
1700	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1701#endif
1702	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1703	and	r3,r3,r12			@ (b^c)&=(a^b)
1704	add	r11,r11,r7			@ d+=h
1705	eor	r3,r3,r9			@ Maj(a,b,c)
1706	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1707	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1708	@ ldr	r2,[sp,#14*4]		@ 29
1709	@ ldr	r1,[sp,#11*4]
1710	mov	r0,r2,ror#7
1711	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1712	mov	r3,r1,ror#17
1713	eor	r0,r0,r2,ror#18
1714	eor	r3,r3,r1,ror#19
1715	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1716	ldr	r2,[sp,#13*4]
1717	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1718	ldr	r1,[sp,#6*4]
1719
1720	add	r3,r3,r0
1721	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1722	add	r2,r2,r3
1723	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1724	add	r2,r2,r1			@ X[i]
1725	ldr	r3,[r14],#4			@ *K256++
1726	add	r6,r6,r2			@ h+=X[i]
1727	str	r2,[sp,#13*4]
1728	eor	r2,r4,r5
1729	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1730	and	r2,r2,r11
1731	add	r6,r6,r3			@ h+=K256[i]
1732	eor	r2,r2,r5			@ Ch(e,f,g)
1733	eor	r0,r7,r7,ror#11
1734	add	r6,r6,r2			@ h+=Ch(e,f,g)
1735#if 29==31
1736	and	r3,r3,#0xff
1737	cmp	r3,#0xf2			@ done?
1738#endif
1739#if 29<15
1740# if __ARM_ARCH__>=7
1741	ldr	r2,[r1],#4			@ prefetch
1742# else
1743	ldrb	r2,[r1,#3]
1744# endif
1745	eor	r3,r7,r8			@ a^b, b^c in next round
1746#else
1747	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1748	eor	r3,r7,r8			@ a^b, b^c in next round
1749	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1750#endif
1751	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1752	and	r12,r12,r3			@ (b^c)&=(a^b)
1753	add	r10,r10,r6			@ d+=h
1754	eor	r12,r12,r8			@ Maj(a,b,c)
1755	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1756	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1757	@ ldr	r2,[sp,#15*4]		@ 30
1758	@ ldr	r1,[sp,#12*4]
1759	mov	r0,r2,ror#7
1760	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1761	mov	r12,r1,ror#17
1762	eor	r0,r0,r2,ror#18
1763	eor	r12,r12,r1,ror#19
1764	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1765	ldr	r2,[sp,#14*4]
1766	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1767	ldr	r1,[sp,#7*4]
1768
1769	add	r12,r12,r0
1770	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1771	add	r2,r2,r12
1772	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1773	add	r2,r2,r1			@ X[i]
1774	ldr	r12,[r14],#4			@ *K256++
1775	add	r5,r5,r2			@ h+=X[i]
1776	str	r2,[sp,#14*4]
1777	eor	r2,r11,r4
1778	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1779	and	r2,r2,r10
1780	add	r5,r5,r12			@ h+=K256[i]
1781	eor	r2,r2,r4			@ Ch(e,f,g)
1782	eor	r0,r6,r6,ror#11
1783	add	r5,r5,r2			@ h+=Ch(e,f,g)
1784#if 30==31
1785	and	r12,r12,#0xff
1786	cmp	r12,#0xf2			@ done?
1787#endif
1788#if 30<15
1789# if __ARM_ARCH__>=7
1790	ldr	r2,[r1],#4			@ prefetch
1791# else
1792	ldrb	r2,[r1,#3]
1793# endif
1794	eor	r12,r6,r7			@ a^b, b^c in next round
1795#else
1796	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1797	eor	r12,r6,r7			@ a^b, b^c in next round
1798	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1799#endif
1800	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1801	and	r3,r3,r12			@ (b^c)&=(a^b)
1802	add	r9,r9,r5			@ d+=h
1803	eor	r3,r3,r7			@ Maj(a,b,c)
1804	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1805	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1806	@ ldr	r2,[sp,#0*4]		@ 31
1807	@ ldr	r1,[sp,#13*4]
1808	mov	r0,r2,ror#7
1809	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1810	mov	r3,r1,ror#17
1811	eor	r0,r0,r2,ror#18
1812	eor	r3,r3,r1,ror#19
1813	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1814	ldr	r2,[sp,#15*4]
1815	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1816	ldr	r1,[sp,#8*4]
1817
1818	add	r3,r3,r0
1819	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1820	add	r2,r2,r3
1821	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1822	add	r2,r2,r1			@ X[i]
1823	ldr	r3,[r14],#4			@ *K256++
1824	add	r4,r4,r2			@ h+=X[i]
1825	str	r2,[sp,#15*4]
1826	eor	r2,r10,r11
1827	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1828	and	r2,r2,r9
1829	add	r4,r4,r3			@ h+=K256[i]
1830	eor	r2,r2,r11			@ Ch(e,f,g)
1831	eor	r0,r5,r5,ror#11
1832	add	r4,r4,r2			@ h+=Ch(e,f,g)
1833#if 31==31
1834	and	r3,r3,#0xff
1835	cmp	r3,#0xf2			@ done?
1836#endif
1837#if 31<15
1838# if __ARM_ARCH__>=7
1839	ldr	r2,[r1],#4			@ prefetch
1840# else
1841	ldrb	r2,[r1,#3]
1842# endif
1843	eor	r3,r5,r6			@ a^b, b^c in next round
1844#else
1845	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1846	eor	r3,r5,r6			@ a^b, b^c in next round
1847	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1848#endif
1849	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1850	and	r12,r12,r3			@ (b^c)&=(a^b)
1851	add	r8,r8,r4			@ d+=h
1852	eor	r12,r12,r6			@ Maj(a,b,c)
1853	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1854	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1855#if __ARM_ARCH__>=7
1856	ite	eq			@ Thumb2 thing, sanity check in ARM
1857#endif
1858	ldreq	r3,[sp,#16*4]		@ pull ctx
1859	bne	Lrounds_16_xx
1860
1861	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1862	ldr	r0,[r3,#0]
1863	ldr	r2,[r3,#4]
1864	ldr	r12,[r3,#8]
1865	add	r4,r4,r0
1866	ldr	r0,[r3,#12]
1867	add	r5,r5,r2
1868	ldr	r2,[r3,#16]
1869	add	r6,r6,r12
1870	ldr	r12,[r3,#20]
1871	add	r7,r7,r0
1872	ldr	r0,[r3,#24]
1873	add	r8,r8,r2
1874	ldr	r2,[r3,#28]
1875	add	r9,r9,r12
1876	ldr	r1,[sp,#17*4]		@ pull inp
1877	ldr	r12,[sp,#18*4]		@ pull inp+len
1878	add	r10,r10,r0
1879	add	r11,r11,r2
1880	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1881	cmp	r1,r12
1882	sub	r14,r14,#256	@ rewind Ktbl
1883	bne	Loop
1884
1885	add	sp,sp,#19*4	@ destroy frame
1886#if __ARM_ARCH__>=5
1887	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1888#else
1889	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1890	tst	lr,#1
1891	moveq	pc,lr			@ be binary compatible with V4, yet
1892.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1893#endif
1894
1895#if __ARM_MAX_ARCH__>=7
1896
1897
1898
1899.globl	_sha256_block_data_order_neon
1900.private_extern	_sha256_block_data_order_neon
1901#ifdef __thumb2__
1902.thumb_func	_sha256_block_data_order_neon
1903#endif
1904.align	5
1905.skip	16
1906_sha256_block_data_order_neon:
1907LNEON:
1908	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1909
1910	sub	r11,sp,#16*4+16
1911	adr	r14,K256
1912	bic	r11,r11,#15		@ align for 128-bit stores
1913	mov	r12,sp
1914	mov	sp,r11			@ alloca
1915	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1916
1917	vld1.8	{q0},[r1]!
1918	vld1.8	{q1},[r1]!
1919	vld1.8	{q2},[r1]!
1920	vld1.8	{q3},[r1]!
1921	vld1.32	{q8},[r14,:128]!
1922	vld1.32	{q9},[r14,:128]!
1923	vld1.32	{q10},[r14,:128]!
1924	vld1.32	{q11},[r14,:128]!
1925	vrev32.8	q0,q0		@ yes, even on
1926	str	r0,[sp,#64]
1927	vrev32.8	q1,q1		@ big-endian
1928	str	r1,[sp,#68]
1929	mov	r1,sp
1930	vrev32.8	q2,q2
1931	str	r2,[sp,#72]
1932	vrev32.8	q3,q3
1933	str	r12,[sp,#76]		@ save original sp
1934	vadd.i32	q8,q8,q0
1935	vadd.i32	q9,q9,q1
1936	vst1.32	{q8},[r1,:128]!
1937	vadd.i32	q10,q10,q2
1938	vst1.32	{q9},[r1,:128]!
1939	vadd.i32	q11,q11,q3
1940	vst1.32	{q10},[r1,:128]!
1941	vst1.32	{q11},[r1,:128]!
1942
1943	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1944	sub	r1,r1,#64
1945	ldr	r2,[sp,#0]
1946	eor	r12,r12,r12
1947	eor	r3,r5,r6
1948	b	L_00_48
1949
1950.align	4
1951L_00_48:
1952	vext.8	q8,q0,q1,#4
1953	add	r11,r11,r2
1954	eor	r2,r9,r10
1955	eor	r0,r8,r8,ror#5
1956	vext.8	q9,q2,q3,#4
1957	add	r4,r4,r12
1958	and	r2,r2,r8
1959	eor	r12,r0,r8,ror#19
1960	vshr.u32	q10,q8,#7
1961	eor	r0,r4,r4,ror#11
1962	eor	r2,r2,r10
1963	vadd.i32	q0,q0,q9
1964	add	r11,r11,r12,ror#6
1965	eor	r12,r4,r5
1966	vshr.u32	q9,q8,#3
1967	eor	r0,r0,r4,ror#20
1968	add	r11,r11,r2
1969	vsli.32	q10,q8,#25
1970	ldr	r2,[sp,#4]
1971	and	r3,r3,r12
1972	vshr.u32	q11,q8,#18
1973	add	r7,r7,r11
1974	add	r11,r11,r0,ror#2
1975	eor	r3,r3,r5
1976	veor	q9,q9,q10
1977	add	r10,r10,r2
1978	vsli.32	q11,q8,#14
1979	eor	r2,r8,r9
1980	eor	r0,r7,r7,ror#5
1981	vshr.u32	d24,d7,#17
1982	add	r11,r11,r3
1983	and	r2,r2,r7
1984	veor	q9,q9,q11
1985	eor	r3,r0,r7,ror#19
1986	eor	r0,r11,r11,ror#11
1987	vsli.32	d24,d7,#15
1988	eor	r2,r2,r9
1989	add	r10,r10,r3,ror#6
1990	vshr.u32	d25,d7,#10
1991	eor	r3,r11,r4
1992	eor	r0,r0,r11,ror#20
1993	vadd.i32	q0,q0,q9
1994	add	r10,r10,r2
1995	ldr	r2,[sp,#8]
1996	veor	d25,d25,d24
1997	and	r12,r12,r3
1998	add	r6,r6,r10
1999	vshr.u32	d24,d7,#19
2000	add	r10,r10,r0,ror#2
2001	eor	r12,r12,r4
2002	vsli.32	d24,d7,#13
2003	add	r9,r9,r2
2004	eor	r2,r7,r8
2005	veor	d25,d25,d24
2006	eor	r0,r6,r6,ror#5
2007	add	r10,r10,r12
2008	vadd.i32	d0,d0,d25
2009	and	r2,r2,r6
2010	eor	r12,r0,r6,ror#19
2011	vshr.u32	d24,d0,#17
2012	eor	r0,r10,r10,ror#11
2013	eor	r2,r2,r8
2014	vsli.32	d24,d0,#15
2015	add	r9,r9,r12,ror#6
2016	eor	r12,r10,r11
2017	vshr.u32	d25,d0,#10
2018	eor	r0,r0,r10,ror#20
2019	add	r9,r9,r2
2020	veor	d25,d25,d24
2021	ldr	r2,[sp,#12]
2022	and	r3,r3,r12
2023	vshr.u32	d24,d0,#19
2024	add	r5,r5,r9
2025	add	r9,r9,r0,ror#2
2026	eor	r3,r3,r11
2027	vld1.32	{q8},[r14,:128]!
2028	add	r8,r8,r2
2029	vsli.32	d24,d0,#13
2030	eor	r2,r6,r7
2031	eor	r0,r5,r5,ror#5
2032	veor	d25,d25,d24
2033	add	r9,r9,r3
2034	and	r2,r2,r5
2035	vadd.i32	d1,d1,d25
2036	eor	r3,r0,r5,ror#19
2037	eor	r0,r9,r9,ror#11
2038	vadd.i32	q8,q8,q0
2039	eor	r2,r2,r7
2040	add	r8,r8,r3,ror#6
2041	eor	r3,r9,r10
2042	eor	r0,r0,r9,ror#20
2043	add	r8,r8,r2
2044	ldr	r2,[sp,#16]
2045	and	r12,r12,r3
2046	add	r4,r4,r8
2047	vst1.32	{q8},[r1,:128]!
2048	add	r8,r8,r0,ror#2
2049	eor	r12,r12,r10
2050	vext.8	q8,q1,q2,#4
2051	add	r7,r7,r2
2052	eor	r2,r5,r6
2053	eor	r0,r4,r4,ror#5
2054	vext.8	q9,q3,q0,#4
2055	add	r8,r8,r12
2056	and	r2,r2,r4
2057	eor	r12,r0,r4,ror#19
2058	vshr.u32	q10,q8,#7
2059	eor	r0,r8,r8,ror#11
2060	eor	r2,r2,r6
2061	vadd.i32	q1,q1,q9
2062	add	r7,r7,r12,ror#6
2063	eor	r12,r8,r9
2064	vshr.u32	q9,q8,#3
2065	eor	r0,r0,r8,ror#20
2066	add	r7,r7,r2
2067	vsli.32	q10,q8,#25
2068	ldr	r2,[sp,#20]
2069	and	r3,r3,r12
2070	vshr.u32	q11,q8,#18
2071	add	r11,r11,r7
2072	add	r7,r7,r0,ror#2
2073	eor	r3,r3,r9
2074	veor	q9,q9,q10
2075	add	r6,r6,r2
2076	vsli.32	q11,q8,#14
2077	eor	r2,r4,r5
2078	eor	r0,r11,r11,ror#5
2079	vshr.u32	d24,d1,#17
2080	add	r7,r7,r3
2081	and	r2,r2,r11
2082	veor	q9,q9,q11
2083	eor	r3,r0,r11,ror#19
2084	eor	r0,r7,r7,ror#11
2085	vsli.32	d24,d1,#15
2086	eor	r2,r2,r5
2087	add	r6,r6,r3,ror#6
2088	vshr.u32	d25,d1,#10
2089	eor	r3,r7,r8
2090	eor	r0,r0,r7,ror#20
2091	vadd.i32	q1,q1,q9
2092	add	r6,r6,r2
2093	ldr	r2,[sp,#24]
2094	veor	d25,d25,d24
2095	and	r12,r12,r3
2096	add	r10,r10,r6
2097	vshr.u32	d24,d1,#19
2098	add	r6,r6,r0,ror#2
2099	eor	r12,r12,r8
2100	vsli.32	d24,d1,#13
2101	add	r5,r5,r2
2102	eor	r2,r11,r4
2103	veor	d25,d25,d24
2104	eor	r0,r10,r10,ror#5
2105	add	r6,r6,r12
2106	vadd.i32	d2,d2,d25
2107	and	r2,r2,r10
2108	eor	r12,r0,r10,ror#19
2109	vshr.u32	d24,d2,#17
2110	eor	r0,r6,r6,ror#11
2111	eor	r2,r2,r4
2112	vsli.32	d24,d2,#15
2113	add	r5,r5,r12,ror#6
2114	eor	r12,r6,r7
2115	vshr.u32	d25,d2,#10
2116	eor	r0,r0,r6,ror#20
2117	add	r5,r5,r2
2118	veor	d25,d25,d24
2119	ldr	r2,[sp,#28]
2120	and	r3,r3,r12
2121	vshr.u32	d24,d2,#19
2122	add	r9,r9,r5
2123	add	r5,r5,r0,ror#2
2124	eor	r3,r3,r7
2125	vld1.32	{q8},[r14,:128]!
2126	add	r4,r4,r2
2127	vsli.32	d24,d2,#13
2128	eor	r2,r10,r11
2129	eor	r0,r9,r9,ror#5
2130	veor	d25,d25,d24
2131	add	r5,r5,r3
2132	and	r2,r2,r9
2133	vadd.i32	d3,d3,d25
2134	eor	r3,r0,r9,ror#19
2135	eor	r0,r5,r5,ror#11
2136	vadd.i32	q8,q8,q1
2137	eor	r2,r2,r11
2138	add	r4,r4,r3,ror#6
2139	eor	r3,r5,r6
2140	eor	r0,r0,r5,ror#20
2141	add	r4,r4,r2
2142	ldr	r2,[sp,#32]
2143	and	r12,r12,r3
2144	add	r8,r8,r4
2145	vst1.32	{q8},[r1,:128]!
2146	add	r4,r4,r0,ror#2
2147	eor	r12,r12,r6
2148	vext.8	q8,q2,q3,#4
2149	add	r11,r11,r2
2150	eor	r2,r9,r10
2151	eor	r0,r8,r8,ror#5
2152	vext.8	q9,q0,q1,#4
2153	add	r4,r4,r12
2154	and	r2,r2,r8
2155	eor	r12,r0,r8,ror#19
2156	vshr.u32	q10,q8,#7
2157	eor	r0,r4,r4,ror#11
2158	eor	r2,r2,r10
2159	vadd.i32	q2,q2,q9
2160	add	r11,r11,r12,ror#6
2161	eor	r12,r4,r5
2162	vshr.u32	q9,q8,#3
2163	eor	r0,r0,r4,ror#20
2164	add	r11,r11,r2
2165	vsli.32	q10,q8,#25
2166	ldr	r2,[sp,#36]
2167	and	r3,r3,r12
2168	vshr.u32	q11,q8,#18
2169	add	r7,r7,r11
2170	add	r11,r11,r0,ror#2
2171	eor	r3,r3,r5
2172	veor	q9,q9,q10
2173	add	r10,r10,r2
2174	vsli.32	q11,q8,#14
2175	eor	r2,r8,r9
2176	eor	r0,r7,r7,ror#5
2177	vshr.u32	d24,d3,#17
2178	add	r11,r11,r3
2179	and	r2,r2,r7
2180	veor	q9,q9,q11
2181	eor	r3,r0,r7,ror#19
2182	eor	r0,r11,r11,ror#11
2183	vsli.32	d24,d3,#15
2184	eor	r2,r2,r9
2185	add	r10,r10,r3,ror#6
2186	vshr.u32	d25,d3,#10
2187	eor	r3,r11,r4
2188	eor	r0,r0,r11,ror#20
2189	vadd.i32	q2,q2,q9
2190	add	r10,r10,r2
2191	ldr	r2,[sp,#40]
2192	veor	d25,d25,d24
2193	and	r12,r12,r3
2194	add	r6,r6,r10
2195	vshr.u32	d24,d3,#19
2196	add	r10,r10,r0,ror#2
2197	eor	r12,r12,r4
2198	vsli.32	d24,d3,#13
2199	add	r9,r9,r2
2200	eor	r2,r7,r8
2201	veor	d25,d25,d24
2202	eor	r0,r6,r6,ror#5
2203	add	r10,r10,r12
2204	vadd.i32	d4,d4,d25
2205	and	r2,r2,r6
2206	eor	r12,r0,r6,ror#19
2207	vshr.u32	d24,d4,#17
2208	eor	r0,r10,r10,ror#11
2209	eor	r2,r2,r8
2210	vsli.32	d24,d4,#15
2211	add	r9,r9,r12,ror#6
2212	eor	r12,r10,r11
2213	vshr.u32	d25,d4,#10
2214	eor	r0,r0,r10,ror#20
2215	add	r9,r9,r2
2216	veor	d25,d25,d24
2217	ldr	r2,[sp,#44]
2218	and	r3,r3,r12
2219	vshr.u32	d24,d4,#19
2220	add	r5,r5,r9
2221	add	r9,r9,r0,ror#2
2222	eor	r3,r3,r11
2223	vld1.32	{q8},[r14,:128]!
2224	add	r8,r8,r2
2225	vsli.32	d24,d4,#13
2226	eor	r2,r6,r7
2227	eor	r0,r5,r5,ror#5
2228	veor	d25,d25,d24
2229	add	r9,r9,r3
2230	and	r2,r2,r5
2231	vadd.i32	d5,d5,d25
2232	eor	r3,r0,r5,ror#19
2233	eor	r0,r9,r9,ror#11
2234	vadd.i32	q8,q8,q2
2235	eor	r2,r2,r7
2236	add	r8,r8,r3,ror#6
2237	eor	r3,r9,r10
2238	eor	r0,r0,r9,ror#20
2239	add	r8,r8,r2
2240	ldr	r2,[sp,#48]
2241	and	r12,r12,r3
2242	add	r4,r4,r8
2243	vst1.32	{q8},[r1,:128]!
2244	add	r8,r8,r0,ror#2
2245	eor	r12,r12,r10
2246	vext.8	q8,q3,q0,#4
2247	add	r7,r7,r2
2248	eor	r2,r5,r6
2249	eor	r0,r4,r4,ror#5
2250	vext.8	q9,q1,q2,#4
2251	add	r8,r8,r12
2252	and	r2,r2,r4
2253	eor	r12,r0,r4,ror#19
2254	vshr.u32	q10,q8,#7
2255	eor	r0,r8,r8,ror#11
2256	eor	r2,r2,r6
2257	vadd.i32	q3,q3,q9
2258	add	r7,r7,r12,ror#6
2259	eor	r12,r8,r9
2260	vshr.u32	q9,q8,#3
2261	eor	r0,r0,r8,ror#20
2262	add	r7,r7,r2
2263	vsli.32	q10,q8,#25
2264	ldr	r2,[sp,#52]
2265	and	r3,r3,r12
2266	vshr.u32	q11,q8,#18
2267	add	r11,r11,r7
2268	add	r7,r7,r0,ror#2
2269	eor	r3,r3,r9
2270	veor	q9,q9,q10
2271	add	r6,r6,r2
2272	vsli.32	q11,q8,#14
2273	eor	r2,r4,r5
2274	eor	r0,r11,r11,ror#5
2275	vshr.u32	d24,d5,#17
2276	add	r7,r7,r3
2277	and	r2,r2,r11
2278	veor	q9,q9,q11
2279	eor	r3,r0,r11,ror#19
2280	eor	r0,r7,r7,ror#11
2281	vsli.32	d24,d5,#15
2282	eor	r2,r2,r5
2283	add	r6,r6,r3,ror#6
2284	vshr.u32	d25,d5,#10
2285	eor	r3,r7,r8
2286	eor	r0,r0,r7,ror#20
2287	vadd.i32	q3,q3,q9
2288	add	r6,r6,r2
2289	ldr	r2,[sp,#56]
2290	veor	d25,d25,d24
2291	and	r12,r12,r3
2292	add	r10,r10,r6
2293	vshr.u32	d24,d5,#19
2294	add	r6,r6,r0,ror#2
2295	eor	r12,r12,r8
2296	vsli.32	d24,d5,#13
2297	add	r5,r5,r2
2298	eor	r2,r11,r4
2299	veor	d25,d25,d24
2300	eor	r0,r10,r10,ror#5
2301	add	r6,r6,r12
2302	vadd.i32	d6,d6,d25
2303	and	r2,r2,r10
2304	eor	r12,r0,r10,ror#19
2305	vshr.u32	d24,d6,#17
2306	eor	r0,r6,r6,ror#11
2307	eor	r2,r2,r4
2308	vsli.32	d24,d6,#15
2309	add	r5,r5,r12,ror#6
2310	eor	r12,r6,r7
2311	vshr.u32	d25,d6,#10
2312	eor	r0,r0,r6,ror#20
2313	add	r5,r5,r2
2314	veor	d25,d25,d24
2315	ldr	r2,[sp,#60]
2316	and	r3,r3,r12
2317	vshr.u32	d24,d6,#19
2318	add	r9,r9,r5
2319	add	r5,r5,r0,ror#2
2320	eor	r3,r3,r7
2321	vld1.32	{q8},[r14,:128]!
2322	add	r4,r4,r2
2323	vsli.32	d24,d6,#13
2324	eor	r2,r10,r11
2325	eor	r0,r9,r9,ror#5
2326	veor	d25,d25,d24
2327	add	r5,r5,r3
2328	and	r2,r2,r9
2329	vadd.i32	d7,d7,d25
2330	eor	r3,r0,r9,ror#19
2331	eor	r0,r5,r5,ror#11
2332	vadd.i32	q8,q8,q3
2333	eor	r2,r2,r11
2334	add	r4,r4,r3,ror#6
2335	eor	r3,r5,r6
2336	eor	r0,r0,r5,ror#20
2337	add	r4,r4,r2
2338	ldr	r2,[r14]
2339	and	r12,r12,r3
2340	add	r8,r8,r4
2341	vst1.32	{q8},[r1,:128]!
2342	add	r4,r4,r0,ror#2
2343	eor	r12,r12,r6
2344	teq	r2,#0				@ check for K256 terminator
2345	ldr	r2,[sp,#0]
2346	sub	r1,r1,#64
2347	bne	L_00_48
2348
2349	ldr	r1,[sp,#68]
2350	ldr	r0,[sp,#72]
2351	sub	r14,r14,#256	@ rewind r14
2352	teq	r1,r0
2353	it	eq
2354	subeq	r1,r1,#64		@ avoid SEGV
2355	vld1.8	{q0},[r1]!		@ load next input block
2356	vld1.8	{q1},[r1]!
2357	vld1.8	{q2},[r1]!
2358	vld1.8	{q3},[r1]!
2359	it	ne
2360	strne	r1,[sp,#68]
2361	mov	r1,sp
2362	add	r11,r11,r2
2363	eor	r2,r9,r10
2364	eor	r0,r8,r8,ror#5
2365	add	r4,r4,r12
2366	vld1.32	{q8},[r14,:128]!
2367	and	r2,r2,r8
2368	eor	r12,r0,r8,ror#19
2369	eor	r0,r4,r4,ror#11
2370	eor	r2,r2,r10
2371	vrev32.8	q0,q0
2372	add	r11,r11,r12,ror#6
2373	eor	r12,r4,r5
2374	eor	r0,r0,r4,ror#20
2375	add	r11,r11,r2
2376	vadd.i32	q8,q8,q0
2377	ldr	r2,[sp,#4]
2378	and	r3,r3,r12
2379	add	r7,r7,r11
2380	add	r11,r11,r0,ror#2
2381	eor	r3,r3,r5
2382	add	r10,r10,r2
2383	eor	r2,r8,r9
2384	eor	r0,r7,r7,ror#5
2385	add	r11,r11,r3
2386	and	r2,r2,r7
2387	eor	r3,r0,r7,ror#19
2388	eor	r0,r11,r11,ror#11
2389	eor	r2,r2,r9
2390	add	r10,r10,r3,ror#6
2391	eor	r3,r11,r4
2392	eor	r0,r0,r11,ror#20
2393	add	r10,r10,r2
2394	ldr	r2,[sp,#8]
2395	and	r12,r12,r3
2396	add	r6,r6,r10
2397	add	r10,r10,r0,ror#2
2398	eor	r12,r12,r4
2399	add	r9,r9,r2
2400	eor	r2,r7,r8
2401	eor	r0,r6,r6,ror#5
2402	add	r10,r10,r12
2403	and	r2,r2,r6
2404	eor	r12,r0,r6,ror#19
2405	eor	r0,r10,r10,ror#11
2406	eor	r2,r2,r8
2407	add	r9,r9,r12,ror#6
2408	eor	r12,r10,r11
2409	eor	r0,r0,r10,ror#20
2410	add	r9,r9,r2
2411	ldr	r2,[sp,#12]
2412	and	r3,r3,r12
2413	add	r5,r5,r9
2414	add	r9,r9,r0,ror#2
2415	eor	r3,r3,r11
2416	add	r8,r8,r2
2417	eor	r2,r6,r7
2418	eor	r0,r5,r5,ror#5
2419	add	r9,r9,r3
2420	and	r2,r2,r5
2421	eor	r3,r0,r5,ror#19
2422	eor	r0,r9,r9,ror#11
2423	eor	r2,r2,r7
2424	add	r8,r8,r3,ror#6
2425	eor	r3,r9,r10
2426	eor	r0,r0,r9,ror#20
2427	add	r8,r8,r2
2428	ldr	r2,[sp,#16]
2429	and	r12,r12,r3
2430	add	r4,r4,r8
2431	add	r8,r8,r0,ror#2
2432	eor	r12,r12,r10
2433	vst1.32	{q8},[r1,:128]!
2434	add	r7,r7,r2
2435	eor	r2,r5,r6
2436	eor	r0,r4,r4,ror#5
2437	add	r8,r8,r12
2438	vld1.32	{q8},[r14,:128]!
2439	and	r2,r2,r4
2440	eor	r12,r0,r4,ror#19
2441	eor	r0,r8,r8,ror#11
2442	eor	r2,r2,r6
2443	vrev32.8	q1,q1
2444	add	r7,r7,r12,ror#6
2445	eor	r12,r8,r9
2446	eor	r0,r0,r8,ror#20
2447	add	r7,r7,r2
2448	vadd.i32	q8,q8,q1
2449	ldr	r2,[sp,#20]
2450	and	r3,r3,r12
2451	add	r11,r11,r7
2452	add	r7,r7,r0,ror#2
2453	eor	r3,r3,r9
2454	add	r6,r6,r2
2455	eor	r2,r4,r5
2456	eor	r0,r11,r11,ror#5
2457	add	r7,r7,r3
2458	and	r2,r2,r11
2459	eor	r3,r0,r11,ror#19
2460	eor	r0,r7,r7,ror#11
2461	eor	r2,r2,r5
2462	add	r6,r6,r3,ror#6
2463	eor	r3,r7,r8
2464	eor	r0,r0,r7,ror#20
2465	add	r6,r6,r2
2466	ldr	r2,[sp,#24]
2467	and	r12,r12,r3
2468	add	r10,r10,r6
2469	add	r6,r6,r0,ror#2
2470	eor	r12,r12,r8
2471	add	r5,r5,r2
2472	eor	r2,r11,r4
2473	eor	r0,r10,r10,ror#5
2474	add	r6,r6,r12
2475	and	r2,r2,r10
2476	eor	r12,r0,r10,ror#19
2477	eor	r0,r6,r6,ror#11
2478	eor	r2,r2,r4
2479	add	r5,r5,r12,ror#6
2480	eor	r12,r6,r7
2481	eor	r0,r0,r6,ror#20
2482	add	r5,r5,r2
2483	ldr	r2,[sp,#28]
2484	and	r3,r3,r12
2485	add	r9,r9,r5
2486	add	r5,r5,r0,ror#2
2487	eor	r3,r3,r7
2488	add	r4,r4,r2
2489	eor	r2,r10,r11
2490	eor	r0,r9,r9,ror#5
2491	add	r5,r5,r3
2492	and	r2,r2,r9
2493	eor	r3,r0,r9,ror#19
2494	eor	r0,r5,r5,ror#11
2495	eor	r2,r2,r11
2496	add	r4,r4,r3,ror#6
2497	eor	r3,r5,r6
2498	eor	r0,r0,r5,ror#20
2499	add	r4,r4,r2
2500	ldr	r2,[sp,#32]
2501	and	r12,r12,r3
2502	add	r8,r8,r4
2503	add	r4,r4,r0,ror#2
2504	eor	r12,r12,r6
2505	vst1.32	{q8},[r1,:128]!
2506	add	r11,r11,r2
2507	eor	r2,r9,r10
2508	eor	r0,r8,r8,ror#5
2509	add	r4,r4,r12
2510	vld1.32	{q8},[r14,:128]!
2511	and	r2,r2,r8
2512	eor	r12,r0,r8,ror#19
2513	eor	r0,r4,r4,ror#11
2514	eor	r2,r2,r10
2515	vrev32.8	q2,q2
2516	add	r11,r11,r12,ror#6
2517	eor	r12,r4,r5
2518	eor	r0,r0,r4,ror#20
2519	add	r11,r11,r2
2520	vadd.i32	q8,q8,q2
2521	ldr	r2,[sp,#36]
2522	and	r3,r3,r12
2523	add	r7,r7,r11
2524	add	r11,r11,r0,ror#2
2525	eor	r3,r3,r5
2526	add	r10,r10,r2
2527	eor	r2,r8,r9
2528	eor	r0,r7,r7,ror#5
2529	add	r11,r11,r3
2530	and	r2,r2,r7
2531	eor	r3,r0,r7,ror#19
2532	eor	r0,r11,r11,ror#11
2533	eor	r2,r2,r9
2534	add	r10,r10,r3,ror#6
2535	eor	r3,r11,r4
2536	eor	r0,r0,r11,ror#20
2537	add	r10,r10,r2
2538	ldr	r2,[sp,#40]
2539	and	r12,r12,r3
2540	add	r6,r6,r10
2541	add	r10,r10,r0,ror#2
2542	eor	r12,r12,r4
2543	add	r9,r9,r2
2544	eor	r2,r7,r8
2545	eor	r0,r6,r6,ror#5
2546	add	r10,r10,r12
2547	and	r2,r2,r6
2548	eor	r12,r0,r6,ror#19
2549	eor	r0,r10,r10,ror#11
2550	eor	r2,r2,r8
2551	add	r9,r9,r12,ror#6
2552	eor	r12,r10,r11
2553	eor	r0,r0,r10,ror#20
2554	add	r9,r9,r2
2555	ldr	r2,[sp,#44]
2556	and	r3,r3,r12
2557	add	r5,r5,r9
2558	add	r9,r9,r0,ror#2
2559	eor	r3,r3,r11
2560	add	r8,r8,r2
2561	eor	r2,r6,r7
2562	eor	r0,r5,r5,ror#5
2563	add	r9,r9,r3
2564	and	r2,r2,r5
2565	eor	r3,r0,r5,ror#19
2566	eor	r0,r9,r9,ror#11
2567	eor	r2,r2,r7
2568	add	r8,r8,r3,ror#6
2569	eor	r3,r9,r10
2570	eor	r0,r0,r9,ror#20
2571	add	r8,r8,r2
2572	ldr	r2,[sp,#48]
2573	and	r12,r12,r3
2574	add	r4,r4,r8
2575	add	r8,r8,r0,ror#2
2576	eor	r12,r12,r10
2577	vst1.32	{q8},[r1,:128]!
2578	add	r7,r7,r2
2579	eor	r2,r5,r6
2580	eor	r0,r4,r4,ror#5
2581	add	r8,r8,r12
2582	vld1.32	{q8},[r14,:128]!
2583	and	r2,r2,r4
2584	eor	r12,r0,r4,ror#19
2585	eor	r0,r8,r8,ror#11
2586	eor	r2,r2,r6
2587	vrev32.8	q3,q3
2588	add	r7,r7,r12,ror#6
2589	eor	r12,r8,r9
2590	eor	r0,r0,r8,ror#20
2591	add	r7,r7,r2
2592	vadd.i32	q8,q8,q3
2593	ldr	r2,[sp,#52]
2594	and	r3,r3,r12
2595	add	r11,r11,r7
2596	add	r7,r7,r0,ror#2
2597	eor	r3,r3,r9
2598	add	r6,r6,r2
2599	eor	r2,r4,r5
2600	eor	r0,r11,r11,ror#5
2601	add	r7,r7,r3
2602	and	r2,r2,r11
2603	eor	r3,r0,r11,ror#19
2604	eor	r0,r7,r7,ror#11
2605	eor	r2,r2,r5
2606	add	r6,r6,r3,ror#6
2607	eor	r3,r7,r8
2608	eor	r0,r0,r7,ror#20
2609	add	r6,r6,r2
2610	ldr	r2,[sp,#56]
2611	and	r12,r12,r3
2612	add	r10,r10,r6
2613	add	r6,r6,r0,ror#2
2614	eor	r12,r12,r8
2615	add	r5,r5,r2
2616	eor	r2,r11,r4
2617	eor	r0,r10,r10,ror#5
2618	add	r6,r6,r12
2619	and	r2,r2,r10
2620	eor	r12,r0,r10,ror#19
2621	eor	r0,r6,r6,ror#11
2622	eor	r2,r2,r4
2623	add	r5,r5,r12,ror#6
2624	eor	r12,r6,r7
2625	eor	r0,r0,r6,ror#20
2626	add	r5,r5,r2
2627	ldr	r2,[sp,#60]
2628	and	r3,r3,r12
2629	add	r9,r9,r5
2630	add	r5,r5,r0,ror#2
2631	eor	r3,r3,r7
2632	add	r4,r4,r2
2633	eor	r2,r10,r11
2634	eor	r0,r9,r9,ror#5
2635	add	r5,r5,r3
2636	and	r2,r2,r9
2637	eor	r3,r0,r9,ror#19
2638	eor	r0,r5,r5,ror#11
2639	eor	r2,r2,r11
2640	add	r4,r4,r3,ror#6
2641	eor	r3,r5,r6
2642	eor	r0,r0,r5,ror#20
2643	add	r4,r4,r2
2644	ldr	r2,[sp,#64]
2645	and	r12,r12,r3
2646	add	r8,r8,r4
2647	add	r4,r4,r0,ror#2
2648	eor	r12,r12,r6
2649	vst1.32	{q8},[r1,:128]!
2650	ldr	r0,[r2,#0]
2651	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2652	ldr	r12,[r2,#4]
2653	ldr	r3,[r2,#8]
2654	ldr	r1,[r2,#12]
2655	add	r4,r4,r0			@ accumulate
2656	ldr	r0,[r2,#16]
2657	add	r5,r5,r12
2658	ldr	r12,[r2,#20]
2659	add	r6,r6,r3
2660	ldr	r3,[r2,#24]
2661	add	r7,r7,r1
2662	ldr	r1,[r2,#28]
2663	add	r8,r8,r0
2664	str	r4,[r2],#4
2665	add	r9,r9,r12
2666	str	r5,[r2],#4
2667	add	r10,r10,r3
2668	str	r6,[r2],#4
2669	add	r11,r11,r1
2670	str	r7,[r2],#4
2671	stmia	r2,{r8,r9,r10,r11}
2672
2673	ittte	ne
2674	movne	r1,sp
2675	ldrne	r2,[sp,#0]
2676	eorne	r12,r12,r12
2677	ldreq	sp,[sp,#76]			@ restore original sp
2678	itt	ne
2679	eorne	r3,r5,r6
2680	bne	L_00_48
2681
2682	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2683
2684#endif
2685#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2686
2687# if defined(__thumb2__)
2688#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2689# else
2690#  define INST(a,b,c,d)	.byte	a,b,c,d
2691# endif
2692
2693#ifdef __thumb2__
2694.thumb_func	sha256_block_data_order_armv8
2695#endif
2696.align	5
2697sha256_block_data_order_armv8:
2698LARMv8:
2699	vld1.32	{q0,q1},[r0]
2700	sub	r3,r3,#256+32
2701	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2702	b	Loop_v8
2703
2704.align	4
2705Loop_v8:
2706	vld1.8	{q8,q9},[r1]!
2707	vld1.8	{q10,q11},[r1]!
2708	vld1.32	{q12},[r3]!
2709	vrev32.8	q8,q8
2710	vrev32.8	q9,q9
2711	vrev32.8	q10,q10
2712	vrev32.8	q11,q11
2713	vmov	q14,q0	@ offload
2714	vmov	q15,q1
2715	teq	r1,r2
2716	vld1.32	{q13},[r3]!
2717	vadd.i32	q12,q12,q8
2718	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2719	vmov	q2,q0
2720	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2721	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2722	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2723	vld1.32	{q12},[r3]!
2724	vadd.i32	q13,q13,q9
2725	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2726	vmov	q2,q0
2727	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2728	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2729	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2730	vld1.32	{q13},[r3]!
2731	vadd.i32	q12,q12,q10
2732	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2733	vmov	q2,q0
2734	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2735	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2736	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2737	vld1.32	{q12},[r3]!
2738	vadd.i32	q13,q13,q11
2739	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2740	vmov	q2,q0
2741	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2742	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2743	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2744	vld1.32	{q13},[r3]!
2745	vadd.i32	q12,q12,q8
2746	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2747	vmov	q2,q0
2748	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2749	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2750	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2751	vld1.32	{q12},[r3]!
2752	vadd.i32	q13,q13,q9
2753	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2754	vmov	q2,q0
2755	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2756	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2757	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2758	vld1.32	{q13},[r3]!
2759	vadd.i32	q12,q12,q10
2760	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2761	vmov	q2,q0
2762	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2763	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2764	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2765	vld1.32	{q12},[r3]!
2766	vadd.i32	q13,q13,q11
2767	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2768	vmov	q2,q0
2769	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2770	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2771	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2772	vld1.32	{q13},[r3]!
2773	vadd.i32	q12,q12,q8
2774	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2775	vmov	q2,q0
2776	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2777	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2778	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2779	vld1.32	{q12},[r3]!
2780	vadd.i32	q13,q13,q9
2781	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2782	vmov	q2,q0
2783	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2784	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2785	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2786	vld1.32	{q13},[r3]!
2787	vadd.i32	q12,q12,q10
2788	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2789	vmov	q2,q0
2790	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2791	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2792	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2793	vld1.32	{q12},[r3]!
2794	vadd.i32	q13,q13,q11
2795	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2796	vmov	q2,q0
2797	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2798	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2799	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2800	vld1.32	{q13},[r3]!
2801	vadd.i32	q12,q12,q8
2802	vmov	q2,q0
2803	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2804	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2805
2806	vld1.32	{q12},[r3]!
2807	vadd.i32	q13,q13,q9
2808	vmov	q2,q0
2809	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2810	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2811
2812	vld1.32	{q13},[r3]
2813	vadd.i32	q12,q12,q10
2814	sub	r3,r3,#256-16	@ rewind
2815	vmov	q2,q0
2816	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2817	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2818
2819	vadd.i32	q13,q13,q11
2820	vmov	q2,q0
2821	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2822	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2823
2824	vadd.i32	q0,q0,q14
2825	vadd.i32	q1,q1,q15
2826	it	ne
2827	bne	Loop_v8
2828
2829	vst1.32	{q0,q1},[r0]
2830
2831	bx	lr		@ bx lr
2832
2833#endif
2834.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2835.align	2
2836.align	2
2837#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2838.comm	_OPENSSL_armcap_P,4
2839.non_lazy_symbol_pointer
2840OPENSSL_armcap_P:
2841.indirect_symbol	_OPENSSL_armcap_P
2842.long	0
2843.private_extern	_OPENSSL_armcap_P
2844#endif
2845#endif  // !OPENSSL_NO_ASM
2846