1// This file is generated from a similarly-named Perl script in the BoringSSL
2// source tree. Do not edit by hand.
3
4#if !defined(__has_feature)
5#define __has_feature(x) 0
6#endif
7#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
8#define OPENSSL_NO_ASM
9#endif
10
11#if !defined(OPENSSL_NO_ASM)
12#if defined(__arm__)
13#if defined(BORINGSSL_PREFIX)
14#include <boringssl_prefix_symbols_asm.h>
15#endif
16@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
17@
18@ Licensed under the OpenSSL license (the "License").  You may not use
19@ this file except in compliance with the License.  You can obtain a copy
20@ in the file LICENSE in the source distribution or at
21@ https://www.openssl.org/source/license.html
22
23
24@ ====================================================================
25@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
26@ project. The module is, however, dual licensed under OpenSSL and
27@ CRYPTOGAMS licenses depending on where you obtain it. For further
28@ details see http://www.openssl.org/~appro/cryptogams/.
29@
30@ Permission to use under GPL terms is granted.
31@ ====================================================================
32
33@ SHA256 block procedure for ARMv4. May 2007.
34
35@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
36@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
37@ byte [on single-issue Xscale PXA250 core].
38
39@ July 2010.
40@
41@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
42@ Cortex A8 core and ~20 cycles per processed byte.
43
44@ February 2011.
45@
46@ Profiler-assisted and platform-specific optimization resulted in 16%
47@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
48
49@ September 2013.
50@
51@ Add NEON implementation. On Cortex A8 it was measured to process one
52@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
53@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
54@ code (meaning that latter performs sub-optimally, nothing was done
55@ about it).
56
57@ May 2014.
58@
59@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
60
61#ifndef __KERNEL__
62# include <openssl/arm_arch.h>
63#else
64# define __ARM_ARCH__ __LINUX_ARM_ARCH__
65# define __ARM_MAX_ARCH__ 7
66#endif
67
68@ Silence ARMv8 deprecated IT instruction warnings. This file is used by both
69@ ARMv7 and ARMv8 processors. It does have ARMv8-only code, but those
70@ instructions are manually-encoded. (See unsha256.)
71.arch	armv7-a
72
73.text
74#if defined(__thumb2__)
75.syntax	unified
76.thumb
77#else
78.code	32
79#endif
80
81.type	K256,%object
82.align	5
83K256:
84.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
85.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
86.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
87.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
88.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
89.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
90.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
91.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
92.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
93.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
94.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
95.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
96.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
97.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
98.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
99.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
100.size	K256,.-K256
101.word	0				@ terminator
102#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
103.LOPENSSL_armcap:
104.word	OPENSSL_armcap_P-.Lsha256_block_data_order
105#endif
106.align	5
107
108.globl	sha256_block_data_order
109.hidden	sha256_block_data_order
110.type	sha256_block_data_order,%function
111sha256_block_data_order:
112.Lsha256_block_data_order:
113#if __ARM_ARCH__<7 && !defined(__thumb2__)
114	sub	r3,pc,#8		@ sha256_block_data_order
115#else
116	adr	r3,.Lsha256_block_data_order
117#endif
118#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
119	ldr	r12,.LOPENSSL_armcap
120	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
121#ifdef	__APPLE__
122	ldr	r12,[r12]
123#endif
124	tst	r12,#ARMV8_SHA256
125	bne	.LARMv8
126	tst	r12,#ARMV7_NEON
127	bne	.LNEON
128#endif
129	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
130	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
131	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
132	sub	r14,r3,#256+32	@ K256
133	sub	sp,sp,#16*4		@ alloca(X[16])
134.Loop:
135# if __ARM_ARCH__>=7
136	ldr	r2,[r1],#4
137# else
138	ldrb	r2,[r1,#3]
139# endif
140	eor	r3,r5,r6		@ magic
141	eor	r12,r12,r12
142#if __ARM_ARCH__>=7
143	@ ldr	r2,[r1],#4			@ 0
144# if 0==15
145	str	r1,[sp,#17*4]			@ make room for r1
146# endif
147	eor	r0,r8,r8,ror#5
148	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
149	eor	r0,r0,r8,ror#19	@ Sigma1(e)
150# ifndef __ARMEB__
151	rev	r2,r2
152# endif
153#else
154	@ ldrb	r2,[r1,#3]			@ 0
155	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
156	ldrb	r12,[r1,#2]
157	ldrb	r0,[r1,#1]
158	orr	r2,r2,r12,lsl#8
159	ldrb	r12,[r1],#4
160	orr	r2,r2,r0,lsl#16
161# if 0==15
162	str	r1,[sp,#17*4]			@ make room for r1
163# endif
164	eor	r0,r8,r8,ror#5
165	orr	r2,r2,r12,lsl#24
166	eor	r0,r0,r8,ror#19	@ Sigma1(e)
167#endif
168	ldr	r12,[r14],#4			@ *K256++
169	add	r11,r11,r2			@ h+=X[i]
170	str	r2,[sp,#0*4]
171	eor	r2,r9,r10
172	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
173	and	r2,r2,r8
174	add	r11,r11,r12			@ h+=K256[i]
175	eor	r2,r2,r10			@ Ch(e,f,g)
176	eor	r0,r4,r4,ror#11
177	add	r11,r11,r2			@ h+=Ch(e,f,g)
178#if 0==31
179	and	r12,r12,#0xff
180	cmp	r12,#0xf2			@ done?
181#endif
182#if 0<15
183# if __ARM_ARCH__>=7
184	ldr	r2,[r1],#4			@ prefetch
185# else
186	ldrb	r2,[r1,#3]
187# endif
188	eor	r12,r4,r5			@ a^b, b^c in next round
189#else
190	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
191	eor	r12,r4,r5			@ a^b, b^c in next round
192	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
193#endif
194	eor	r0,r0,r4,ror#20	@ Sigma0(a)
195	and	r3,r3,r12			@ (b^c)&=(a^b)
196	add	r7,r7,r11			@ d+=h
197	eor	r3,r3,r5			@ Maj(a,b,c)
198	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
199	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
200#if __ARM_ARCH__>=7
201	@ ldr	r2,[r1],#4			@ 1
202# if 1==15
203	str	r1,[sp,#17*4]			@ make room for r1
204# endif
205	eor	r0,r7,r7,ror#5
206	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
207	eor	r0,r0,r7,ror#19	@ Sigma1(e)
208# ifndef __ARMEB__
209	rev	r2,r2
210# endif
211#else
212	@ ldrb	r2,[r1,#3]			@ 1
213	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
214	ldrb	r3,[r1,#2]
215	ldrb	r0,[r1,#1]
216	orr	r2,r2,r3,lsl#8
217	ldrb	r3,[r1],#4
218	orr	r2,r2,r0,lsl#16
219# if 1==15
220	str	r1,[sp,#17*4]			@ make room for r1
221# endif
222	eor	r0,r7,r7,ror#5
223	orr	r2,r2,r3,lsl#24
224	eor	r0,r0,r7,ror#19	@ Sigma1(e)
225#endif
226	ldr	r3,[r14],#4			@ *K256++
227	add	r10,r10,r2			@ h+=X[i]
228	str	r2,[sp,#1*4]
229	eor	r2,r8,r9
230	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
231	and	r2,r2,r7
232	add	r10,r10,r3			@ h+=K256[i]
233	eor	r2,r2,r9			@ Ch(e,f,g)
234	eor	r0,r11,r11,ror#11
235	add	r10,r10,r2			@ h+=Ch(e,f,g)
236#if 1==31
237	and	r3,r3,#0xff
238	cmp	r3,#0xf2			@ done?
239#endif
240#if 1<15
241# if __ARM_ARCH__>=7
242	ldr	r2,[r1],#4			@ prefetch
243# else
244	ldrb	r2,[r1,#3]
245# endif
246	eor	r3,r11,r4			@ a^b, b^c in next round
247#else
248	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
249	eor	r3,r11,r4			@ a^b, b^c in next round
250	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
251#endif
252	eor	r0,r0,r11,ror#20	@ Sigma0(a)
253	and	r12,r12,r3			@ (b^c)&=(a^b)
254	add	r6,r6,r10			@ d+=h
255	eor	r12,r12,r4			@ Maj(a,b,c)
256	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
257	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
258#if __ARM_ARCH__>=7
259	@ ldr	r2,[r1],#4			@ 2
260# if 2==15
261	str	r1,[sp,#17*4]			@ make room for r1
262# endif
263	eor	r0,r6,r6,ror#5
264	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
265	eor	r0,r0,r6,ror#19	@ Sigma1(e)
266# ifndef __ARMEB__
267	rev	r2,r2
268# endif
269#else
270	@ ldrb	r2,[r1,#3]			@ 2
271	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
272	ldrb	r12,[r1,#2]
273	ldrb	r0,[r1,#1]
274	orr	r2,r2,r12,lsl#8
275	ldrb	r12,[r1],#4
276	orr	r2,r2,r0,lsl#16
277# if 2==15
278	str	r1,[sp,#17*4]			@ make room for r1
279# endif
280	eor	r0,r6,r6,ror#5
281	orr	r2,r2,r12,lsl#24
282	eor	r0,r0,r6,ror#19	@ Sigma1(e)
283#endif
284	ldr	r12,[r14],#4			@ *K256++
285	add	r9,r9,r2			@ h+=X[i]
286	str	r2,[sp,#2*4]
287	eor	r2,r7,r8
288	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
289	and	r2,r2,r6
290	add	r9,r9,r12			@ h+=K256[i]
291	eor	r2,r2,r8			@ Ch(e,f,g)
292	eor	r0,r10,r10,ror#11
293	add	r9,r9,r2			@ h+=Ch(e,f,g)
294#if 2==31
295	and	r12,r12,#0xff
296	cmp	r12,#0xf2			@ done?
297#endif
298#if 2<15
299# if __ARM_ARCH__>=7
300	ldr	r2,[r1],#4			@ prefetch
301# else
302	ldrb	r2,[r1,#3]
303# endif
304	eor	r12,r10,r11			@ a^b, b^c in next round
305#else
306	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
307	eor	r12,r10,r11			@ a^b, b^c in next round
308	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
309#endif
310	eor	r0,r0,r10,ror#20	@ Sigma0(a)
311	and	r3,r3,r12			@ (b^c)&=(a^b)
312	add	r5,r5,r9			@ d+=h
313	eor	r3,r3,r11			@ Maj(a,b,c)
314	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
315	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
316#if __ARM_ARCH__>=7
317	@ ldr	r2,[r1],#4			@ 3
318# if 3==15
319	str	r1,[sp,#17*4]			@ make room for r1
320# endif
321	eor	r0,r5,r5,ror#5
322	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
323	eor	r0,r0,r5,ror#19	@ Sigma1(e)
324# ifndef __ARMEB__
325	rev	r2,r2
326# endif
327#else
328	@ ldrb	r2,[r1,#3]			@ 3
329	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
330	ldrb	r3,[r1,#2]
331	ldrb	r0,[r1,#1]
332	orr	r2,r2,r3,lsl#8
333	ldrb	r3,[r1],#4
334	orr	r2,r2,r0,lsl#16
335# if 3==15
336	str	r1,[sp,#17*4]			@ make room for r1
337# endif
338	eor	r0,r5,r5,ror#5
339	orr	r2,r2,r3,lsl#24
340	eor	r0,r0,r5,ror#19	@ Sigma1(e)
341#endif
342	ldr	r3,[r14],#4			@ *K256++
343	add	r8,r8,r2			@ h+=X[i]
344	str	r2,[sp,#3*4]
345	eor	r2,r6,r7
346	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
347	and	r2,r2,r5
348	add	r8,r8,r3			@ h+=K256[i]
349	eor	r2,r2,r7			@ Ch(e,f,g)
350	eor	r0,r9,r9,ror#11
351	add	r8,r8,r2			@ h+=Ch(e,f,g)
352#if 3==31
353	and	r3,r3,#0xff
354	cmp	r3,#0xf2			@ done?
355#endif
356#if 3<15
357# if __ARM_ARCH__>=7
358	ldr	r2,[r1],#4			@ prefetch
359# else
360	ldrb	r2,[r1,#3]
361# endif
362	eor	r3,r9,r10			@ a^b, b^c in next round
363#else
364	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
365	eor	r3,r9,r10			@ a^b, b^c in next round
366	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
367#endif
368	eor	r0,r0,r9,ror#20	@ Sigma0(a)
369	and	r12,r12,r3			@ (b^c)&=(a^b)
370	add	r4,r4,r8			@ d+=h
371	eor	r12,r12,r10			@ Maj(a,b,c)
372	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
373	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
374#if __ARM_ARCH__>=7
375	@ ldr	r2,[r1],#4			@ 4
376# if 4==15
377	str	r1,[sp,#17*4]			@ make room for r1
378# endif
379	eor	r0,r4,r4,ror#5
380	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
381	eor	r0,r0,r4,ror#19	@ Sigma1(e)
382# ifndef __ARMEB__
383	rev	r2,r2
384# endif
385#else
386	@ ldrb	r2,[r1,#3]			@ 4
387	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
388	ldrb	r12,[r1,#2]
389	ldrb	r0,[r1,#1]
390	orr	r2,r2,r12,lsl#8
391	ldrb	r12,[r1],#4
392	orr	r2,r2,r0,lsl#16
393# if 4==15
394	str	r1,[sp,#17*4]			@ make room for r1
395# endif
396	eor	r0,r4,r4,ror#5
397	orr	r2,r2,r12,lsl#24
398	eor	r0,r0,r4,ror#19	@ Sigma1(e)
399#endif
400	ldr	r12,[r14],#4			@ *K256++
401	add	r7,r7,r2			@ h+=X[i]
402	str	r2,[sp,#4*4]
403	eor	r2,r5,r6
404	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
405	and	r2,r2,r4
406	add	r7,r7,r12			@ h+=K256[i]
407	eor	r2,r2,r6			@ Ch(e,f,g)
408	eor	r0,r8,r8,ror#11
409	add	r7,r7,r2			@ h+=Ch(e,f,g)
410#if 4==31
411	and	r12,r12,#0xff
412	cmp	r12,#0xf2			@ done?
413#endif
414#if 4<15
415# if __ARM_ARCH__>=7
416	ldr	r2,[r1],#4			@ prefetch
417# else
418	ldrb	r2,[r1,#3]
419# endif
420	eor	r12,r8,r9			@ a^b, b^c in next round
421#else
422	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
423	eor	r12,r8,r9			@ a^b, b^c in next round
424	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
425#endif
426	eor	r0,r0,r8,ror#20	@ Sigma0(a)
427	and	r3,r3,r12			@ (b^c)&=(a^b)
428	add	r11,r11,r7			@ d+=h
429	eor	r3,r3,r9			@ Maj(a,b,c)
430	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
431	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
432#if __ARM_ARCH__>=7
433	@ ldr	r2,[r1],#4			@ 5
434# if 5==15
435	str	r1,[sp,#17*4]			@ make room for r1
436# endif
437	eor	r0,r11,r11,ror#5
438	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
439	eor	r0,r0,r11,ror#19	@ Sigma1(e)
440# ifndef __ARMEB__
441	rev	r2,r2
442# endif
443#else
444	@ ldrb	r2,[r1,#3]			@ 5
445	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
446	ldrb	r3,[r1,#2]
447	ldrb	r0,[r1,#1]
448	orr	r2,r2,r3,lsl#8
449	ldrb	r3,[r1],#4
450	orr	r2,r2,r0,lsl#16
451# if 5==15
452	str	r1,[sp,#17*4]			@ make room for r1
453# endif
454	eor	r0,r11,r11,ror#5
455	orr	r2,r2,r3,lsl#24
456	eor	r0,r0,r11,ror#19	@ Sigma1(e)
457#endif
458	ldr	r3,[r14],#4			@ *K256++
459	add	r6,r6,r2			@ h+=X[i]
460	str	r2,[sp,#5*4]
461	eor	r2,r4,r5
462	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
463	and	r2,r2,r11
464	add	r6,r6,r3			@ h+=K256[i]
465	eor	r2,r2,r5			@ Ch(e,f,g)
466	eor	r0,r7,r7,ror#11
467	add	r6,r6,r2			@ h+=Ch(e,f,g)
468#if 5==31
469	and	r3,r3,#0xff
470	cmp	r3,#0xf2			@ done?
471#endif
472#if 5<15
473# if __ARM_ARCH__>=7
474	ldr	r2,[r1],#4			@ prefetch
475# else
476	ldrb	r2,[r1,#3]
477# endif
478	eor	r3,r7,r8			@ a^b, b^c in next round
479#else
480	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
481	eor	r3,r7,r8			@ a^b, b^c in next round
482	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
483#endif
484	eor	r0,r0,r7,ror#20	@ Sigma0(a)
485	and	r12,r12,r3			@ (b^c)&=(a^b)
486	add	r10,r10,r6			@ d+=h
487	eor	r12,r12,r8			@ Maj(a,b,c)
488	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
489	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
490#if __ARM_ARCH__>=7
491	@ ldr	r2,[r1],#4			@ 6
492# if 6==15
493	str	r1,[sp,#17*4]			@ make room for r1
494# endif
495	eor	r0,r10,r10,ror#5
496	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
497	eor	r0,r0,r10,ror#19	@ Sigma1(e)
498# ifndef __ARMEB__
499	rev	r2,r2
500# endif
501#else
502	@ ldrb	r2,[r1,#3]			@ 6
503	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
504	ldrb	r12,[r1,#2]
505	ldrb	r0,[r1,#1]
506	orr	r2,r2,r12,lsl#8
507	ldrb	r12,[r1],#4
508	orr	r2,r2,r0,lsl#16
509# if 6==15
510	str	r1,[sp,#17*4]			@ make room for r1
511# endif
512	eor	r0,r10,r10,ror#5
513	orr	r2,r2,r12,lsl#24
514	eor	r0,r0,r10,ror#19	@ Sigma1(e)
515#endif
516	ldr	r12,[r14],#4			@ *K256++
517	add	r5,r5,r2			@ h+=X[i]
518	str	r2,[sp,#6*4]
519	eor	r2,r11,r4
520	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
521	and	r2,r2,r10
522	add	r5,r5,r12			@ h+=K256[i]
523	eor	r2,r2,r4			@ Ch(e,f,g)
524	eor	r0,r6,r6,ror#11
525	add	r5,r5,r2			@ h+=Ch(e,f,g)
526#if 6==31
527	and	r12,r12,#0xff
528	cmp	r12,#0xf2			@ done?
529#endif
530#if 6<15
531# if __ARM_ARCH__>=7
532	ldr	r2,[r1],#4			@ prefetch
533# else
534	ldrb	r2,[r1,#3]
535# endif
536	eor	r12,r6,r7			@ a^b, b^c in next round
537#else
538	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
539	eor	r12,r6,r7			@ a^b, b^c in next round
540	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
541#endif
542	eor	r0,r0,r6,ror#20	@ Sigma0(a)
543	and	r3,r3,r12			@ (b^c)&=(a^b)
544	add	r9,r9,r5			@ d+=h
545	eor	r3,r3,r7			@ Maj(a,b,c)
546	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
547	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
548#if __ARM_ARCH__>=7
549	@ ldr	r2,[r1],#4			@ 7
550# if 7==15
551	str	r1,[sp,#17*4]			@ make room for r1
552# endif
553	eor	r0,r9,r9,ror#5
554	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
555	eor	r0,r0,r9,ror#19	@ Sigma1(e)
556# ifndef __ARMEB__
557	rev	r2,r2
558# endif
559#else
560	@ ldrb	r2,[r1,#3]			@ 7
561	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
562	ldrb	r3,[r1,#2]
563	ldrb	r0,[r1,#1]
564	orr	r2,r2,r3,lsl#8
565	ldrb	r3,[r1],#4
566	orr	r2,r2,r0,lsl#16
567# if 7==15
568	str	r1,[sp,#17*4]			@ make room for r1
569# endif
570	eor	r0,r9,r9,ror#5
571	orr	r2,r2,r3,lsl#24
572	eor	r0,r0,r9,ror#19	@ Sigma1(e)
573#endif
574	ldr	r3,[r14],#4			@ *K256++
575	add	r4,r4,r2			@ h+=X[i]
576	str	r2,[sp,#7*4]
577	eor	r2,r10,r11
578	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
579	and	r2,r2,r9
580	add	r4,r4,r3			@ h+=K256[i]
581	eor	r2,r2,r11			@ Ch(e,f,g)
582	eor	r0,r5,r5,ror#11
583	add	r4,r4,r2			@ h+=Ch(e,f,g)
584#if 7==31
585	and	r3,r3,#0xff
586	cmp	r3,#0xf2			@ done?
587#endif
588#if 7<15
589# if __ARM_ARCH__>=7
590	ldr	r2,[r1],#4			@ prefetch
591# else
592	ldrb	r2,[r1,#3]
593# endif
594	eor	r3,r5,r6			@ a^b, b^c in next round
595#else
596	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
597	eor	r3,r5,r6			@ a^b, b^c in next round
598	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
599#endif
600	eor	r0,r0,r5,ror#20	@ Sigma0(a)
601	and	r12,r12,r3			@ (b^c)&=(a^b)
602	add	r8,r8,r4			@ d+=h
603	eor	r12,r12,r6			@ Maj(a,b,c)
604	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
605	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
606#if __ARM_ARCH__>=7
607	@ ldr	r2,[r1],#4			@ 8
608# if 8==15
609	str	r1,[sp,#17*4]			@ make room for r1
610# endif
611	eor	r0,r8,r8,ror#5
612	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
613	eor	r0,r0,r8,ror#19	@ Sigma1(e)
614# ifndef __ARMEB__
615	rev	r2,r2
616# endif
617#else
618	@ ldrb	r2,[r1,#3]			@ 8
619	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
620	ldrb	r12,[r1,#2]
621	ldrb	r0,[r1,#1]
622	orr	r2,r2,r12,lsl#8
623	ldrb	r12,[r1],#4
624	orr	r2,r2,r0,lsl#16
625# if 8==15
626	str	r1,[sp,#17*4]			@ make room for r1
627# endif
628	eor	r0,r8,r8,ror#5
629	orr	r2,r2,r12,lsl#24
630	eor	r0,r0,r8,ror#19	@ Sigma1(e)
631#endif
632	ldr	r12,[r14],#4			@ *K256++
633	add	r11,r11,r2			@ h+=X[i]
634	str	r2,[sp,#8*4]
635	eor	r2,r9,r10
636	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
637	and	r2,r2,r8
638	add	r11,r11,r12			@ h+=K256[i]
639	eor	r2,r2,r10			@ Ch(e,f,g)
640	eor	r0,r4,r4,ror#11
641	add	r11,r11,r2			@ h+=Ch(e,f,g)
642#if 8==31
643	and	r12,r12,#0xff
644	cmp	r12,#0xf2			@ done?
645#endif
646#if 8<15
647# if __ARM_ARCH__>=7
648	ldr	r2,[r1],#4			@ prefetch
649# else
650	ldrb	r2,[r1,#3]
651# endif
652	eor	r12,r4,r5			@ a^b, b^c in next round
653#else
654	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
655	eor	r12,r4,r5			@ a^b, b^c in next round
656	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
657#endif
658	eor	r0,r0,r4,ror#20	@ Sigma0(a)
659	and	r3,r3,r12			@ (b^c)&=(a^b)
660	add	r7,r7,r11			@ d+=h
661	eor	r3,r3,r5			@ Maj(a,b,c)
662	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
663	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
664#if __ARM_ARCH__>=7
665	@ ldr	r2,[r1],#4			@ 9
666# if 9==15
667	str	r1,[sp,#17*4]			@ make room for r1
668# endif
669	eor	r0,r7,r7,ror#5
670	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
671	eor	r0,r0,r7,ror#19	@ Sigma1(e)
672# ifndef __ARMEB__
673	rev	r2,r2
674# endif
675#else
676	@ ldrb	r2,[r1,#3]			@ 9
677	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
678	ldrb	r3,[r1,#2]
679	ldrb	r0,[r1,#1]
680	orr	r2,r2,r3,lsl#8
681	ldrb	r3,[r1],#4
682	orr	r2,r2,r0,lsl#16
683# if 9==15
684	str	r1,[sp,#17*4]			@ make room for r1
685# endif
686	eor	r0,r7,r7,ror#5
687	orr	r2,r2,r3,lsl#24
688	eor	r0,r0,r7,ror#19	@ Sigma1(e)
689#endif
690	ldr	r3,[r14],#4			@ *K256++
691	add	r10,r10,r2			@ h+=X[i]
692	str	r2,[sp,#9*4]
693	eor	r2,r8,r9
694	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
695	and	r2,r2,r7
696	add	r10,r10,r3			@ h+=K256[i]
697	eor	r2,r2,r9			@ Ch(e,f,g)
698	eor	r0,r11,r11,ror#11
699	add	r10,r10,r2			@ h+=Ch(e,f,g)
700#if 9==31
701	and	r3,r3,#0xff
702	cmp	r3,#0xf2			@ done?
703#endif
704#if 9<15
705# if __ARM_ARCH__>=7
706	ldr	r2,[r1],#4			@ prefetch
707# else
708	ldrb	r2,[r1,#3]
709# endif
710	eor	r3,r11,r4			@ a^b, b^c in next round
711#else
712	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
713	eor	r3,r11,r4			@ a^b, b^c in next round
714	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
715#endif
716	eor	r0,r0,r11,ror#20	@ Sigma0(a)
717	and	r12,r12,r3			@ (b^c)&=(a^b)
718	add	r6,r6,r10			@ d+=h
719	eor	r12,r12,r4			@ Maj(a,b,c)
720	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
721	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
722#if __ARM_ARCH__>=7
723	@ ldr	r2,[r1],#4			@ 10
724# if 10==15
725	str	r1,[sp,#17*4]			@ make room for r1
726# endif
727	eor	r0,r6,r6,ror#5
728	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
729	eor	r0,r0,r6,ror#19	@ Sigma1(e)
730# ifndef __ARMEB__
731	rev	r2,r2
732# endif
733#else
734	@ ldrb	r2,[r1,#3]			@ 10
735	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
736	ldrb	r12,[r1,#2]
737	ldrb	r0,[r1,#1]
738	orr	r2,r2,r12,lsl#8
739	ldrb	r12,[r1],#4
740	orr	r2,r2,r0,lsl#16
741# if 10==15
742	str	r1,[sp,#17*4]			@ make room for r1
743# endif
744	eor	r0,r6,r6,ror#5
745	orr	r2,r2,r12,lsl#24
746	eor	r0,r0,r6,ror#19	@ Sigma1(e)
747#endif
748	ldr	r12,[r14],#4			@ *K256++
749	add	r9,r9,r2			@ h+=X[i]
750	str	r2,[sp,#10*4]
751	eor	r2,r7,r8
752	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
753	and	r2,r2,r6
754	add	r9,r9,r12			@ h+=K256[i]
755	eor	r2,r2,r8			@ Ch(e,f,g)
756	eor	r0,r10,r10,ror#11
757	add	r9,r9,r2			@ h+=Ch(e,f,g)
758#if 10==31
759	and	r12,r12,#0xff
760	cmp	r12,#0xf2			@ done?
761#endif
762#if 10<15
763# if __ARM_ARCH__>=7
764	ldr	r2,[r1],#4			@ prefetch
765# else
766	ldrb	r2,[r1,#3]
767# endif
768	eor	r12,r10,r11			@ a^b, b^c in next round
769#else
770	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
771	eor	r12,r10,r11			@ a^b, b^c in next round
772	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
773#endif
774	eor	r0,r0,r10,ror#20	@ Sigma0(a)
775	and	r3,r3,r12			@ (b^c)&=(a^b)
776	add	r5,r5,r9			@ d+=h
777	eor	r3,r3,r11			@ Maj(a,b,c)
778	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
779	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
780#if __ARM_ARCH__>=7
781	@ ldr	r2,[r1],#4			@ 11
782# if 11==15
783	str	r1,[sp,#17*4]			@ make room for r1
784# endif
785	eor	r0,r5,r5,ror#5
786	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
787	eor	r0,r0,r5,ror#19	@ Sigma1(e)
788# ifndef __ARMEB__
789	rev	r2,r2
790# endif
791#else
792	@ ldrb	r2,[r1,#3]			@ 11
793	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
794	ldrb	r3,[r1,#2]
795	ldrb	r0,[r1,#1]
796	orr	r2,r2,r3,lsl#8
797	ldrb	r3,[r1],#4
798	orr	r2,r2,r0,lsl#16
799# if 11==15
800	str	r1,[sp,#17*4]			@ make room for r1
801# endif
802	eor	r0,r5,r5,ror#5
803	orr	r2,r2,r3,lsl#24
804	eor	r0,r0,r5,ror#19	@ Sigma1(e)
805#endif
806	ldr	r3,[r14],#4			@ *K256++
807	add	r8,r8,r2			@ h+=X[i]
808	str	r2,[sp,#11*4]
809	eor	r2,r6,r7
810	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
811	and	r2,r2,r5
812	add	r8,r8,r3			@ h+=K256[i]
813	eor	r2,r2,r7			@ Ch(e,f,g)
814	eor	r0,r9,r9,ror#11
815	add	r8,r8,r2			@ h+=Ch(e,f,g)
816#if 11==31
817	and	r3,r3,#0xff
818	cmp	r3,#0xf2			@ done?
819#endif
820#if 11<15
821# if __ARM_ARCH__>=7
822	ldr	r2,[r1],#4			@ prefetch
823# else
824	ldrb	r2,[r1,#3]
825# endif
826	eor	r3,r9,r10			@ a^b, b^c in next round
827#else
828	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
829	eor	r3,r9,r10			@ a^b, b^c in next round
830	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
831#endif
832	eor	r0,r0,r9,ror#20	@ Sigma0(a)
833	and	r12,r12,r3			@ (b^c)&=(a^b)
834	add	r4,r4,r8			@ d+=h
835	eor	r12,r12,r10			@ Maj(a,b,c)
836	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
837	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
838#if __ARM_ARCH__>=7
839	@ ldr	r2,[r1],#4			@ 12
840# if 12==15
841	str	r1,[sp,#17*4]			@ make room for r1
842# endif
843	eor	r0,r4,r4,ror#5
844	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
845	eor	r0,r0,r4,ror#19	@ Sigma1(e)
846# ifndef __ARMEB__
847	rev	r2,r2
848# endif
849#else
850	@ ldrb	r2,[r1,#3]			@ 12
851	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
852	ldrb	r12,[r1,#2]
853	ldrb	r0,[r1,#1]
854	orr	r2,r2,r12,lsl#8
855	ldrb	r12,[r1],#4
856	orr	r2,r2,r0,lsl#16
857# if 12==15
858	str	r1,[sp,#17*4]			@ make room for r1
859# endif
860	eor	r0,r4,r4,ror#5
861	orr	r2,r2,r12,lsl#24
862	eor	r0,r0,r4,ror#19	@ Sigma1(e)
863#endif
864	ldr	r12,[r14],#4			@ *K256++
865	add	r7,r7,r2			@ h+=X[i]
866	str	r2,[sp,#12*4]
867	eor	r2,r5,r6
868	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
869	and	r2,r2,r4
870	add	r7,r7,r12			@ h+=K256[i]
871	eor	r2,r2,r6			@ Ch(e,f,g)
872	eor	r0,r8,r8,ror#11
873	add	r7,r7,r2			@ h+=Ch(e,f,g)
874#if 12==31
875	and	r12,r12,#0xff
876	cmp	r12,#0xf2			@ done?
877#endif
878#if 12<15
879# if __ARM_ARCH__>=7
880	ldr	r2,[r1],#4			@ prefetch
881# else
882	ldrb	r2,[r1,#3]
883# endif
884	eor	r12,r8,r9			@ a^b, b^c in next round
885#else
886	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
887	eor	r12,r8,r9			@ a^b, b^c in next round
888	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
889#endif
890	eor	r0,r0,r8,ror#20	@ Sigma0(a)
891	and	r3,r3,r12			@ (b^c)&=(a^b)
892	add	r11,r11,r7			@ d+=h
893	eor	r3,r3,r9			@ Maj(a,b,c)
894	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
895	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
896#if __ARM_ARCH__>=7
897	@ ldr	r2,[r1],#4			@ 13
898# if 13==15
899	str	r1,[sp,#17*4]			@ make room for r1
900# endif
901	eor	r0,r11,r11,ror#5
902	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
903	eor	r0,r0,r11,ror#19	@ Sigma1(e)
904# ifndef __ARMEB__
905	rev	r2,r2
906# endif
907#else
908	@ ldrb	r2,[r1,#3]			@ 13
909	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
910	ldrb	r3,[r1,#2]
911	ldrb	r0,[r1,#1]
912	orr	r2,r2,r3,lsl#8
913	ldrb	r3,[r1],#4
914	orr	r2,r2,r0,lsl#16
915# if 13==15
916	str	r1,[sp,#17*4]			@ make room for r1
917# endif
918	eor	r0,r11,r11,ror#5
919	orr	r2,r2,r3,lsl#24
920	eor	r0,r0,r11,ror#19	@ Sigma1(e)
921#endif
922	ldr	r3,[r14],#4			@ *K256++
923	add	r6,r6,r2			@ h+=X[i]
924	str	r2,[sp,#13*4]
925	eor	r2,r4,r5
926	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
927	and	r2,r2,r11
928	add	r6,r6,r3			@ h+=K256[i]
929	eor	r2,r2,r5			@ Ch(e,f,g)
930	eor	r0,r7,r7,ror#11
931	add	r6,r6,r2			@ h+=Ch(e,f,g)
932#if 13==31
933	and	r3,r3,#0xff
934	cmp	r3,#0xf2			@ done?
935#endif
936#if 13<15
937# if __ARM_ARCH__>=7
938	ldr	r2,[r1],#4			@ prefetch
939# else
940	ldrb	r2,[r1,#3]
941# endif
942	eor	r3,r7,r8			@ a^b, b^c in next round
943#else
944	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
945	eor	r3,r7,r8			@ a^b, b^c in next round
946	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
947#endif
948	eor	r0,r0,r7,ror#20	@ Sigma0(a)
949	and	r12,r12,r3			@ (b^c)&=(a^b)
950	add	r10,r10,r6			@ d+=h
951	eor	r12,r12,r8			@ Maj(a,b,c)
952	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
953	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
954#if __ARM_ARCH__>=7
955	@ ldr	r2,[r1],#4			@ 14
956# if 14==15
957	str	r1,[sp,#17*4]			@ make room for r1
958# endif
959	eor	r0,r10,r10,ror#5
960	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
961	eor	r0,r0,r10,ror#19	@ Sigma1(e)
962# ifndef __ARMEB__
963	rev	r2,r2
964# endif
965#else
966	@ ldrb	r2,[r1,#3]			@ 14
967	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
968	ldrb	r12,[r1,#2]
969	ldrb	r0,[r1,#1]
970	orr	r2,r2,r12,lsl#8
971	ldrb	r12,[r1],#4
972	orr	r2,r2,r0,lsl#16
973# if 14==15
974	str	r1,[sp,#17*4]			@ make room for r1
975# endif
976	eor	r0,r10,r10,ror#5
977	orr	r2,r2,r12,lsl#24
978	eor	r0,r0,r10,ror#19	@ Sigma1(e)
979#endif
980	ldr	r12,[r14],#4			@ *K256++
981	add	r5,r5,r2			@ h+=X[i]
982	str	r2,[sp,#14*4]
983	eor	r2,r11,r4
984	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
985	and	r2,r2,r10
986	add	r5,r5,r12			@ h+=K256[i]
987	eor	r2,r2,r4			@ Ch(e,f,g)
988	eor	r0,r6,r6,ror#11
989	add	r5,r5,r2			@ h+=Ch(e,f,g)
990#if 14==31
991	and	r12,r12,#0xff
992	cmp	r12,#0xf2			@ done?
993#endif
994#if 14<15
995# if __ARM_ARCH__>=7
996	ldr	r2,[r1],#4			@ prefetch
997# else
998	ldrb	r2,[r1,#3]
999# endif
1000	eor	r12,r6,r7			@ a^b, b^c in next round
1001#else
1002	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1003	eor	r12,r6,r7			@ a^b, b^c in next round
1004	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1005#endif
1006	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1007	and	r3,r3,r12			@ (b^c)&=(a^b)
1008	add	r9,r9,r5			@ d+=h
1009	eor	r3,r3,r7			@ Maj(a,b,c)
1010	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1011	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1012#if __ARM_ARCH__>=7
1013	@ ldr	r2,[r1],#4			@ 15
1014# if 15==15
1015	str	r1,[sp,#17*4]			@ make room for r1
1016# endif
1017	eor	r0,r9,r9,ror#5
1018	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1019	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1020# ifndef __ARMEB__
1021	rev	r2,r2
1022# endif
1023#else
1024	@ ldrb	r2,[r1,#3]			@ 15
1025	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1026	ldrb	r3,[r1,#2]
1027	ldrb	r0,[r1,#1]
1028	orr	r2,r2,r3,lsl#8
1029	ldrb	r3,[r1],#4
1030	orr	r2,r2,r0,lsl#16
1031# if 15==15
1032	str	r1,[sp,#17*4]			@ make room for r1
1033# endif
1034	eor	r0,r9,r9,ror#5
1035	orr	r2,r2,r3,lsl#24
1036	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1037#endif
1038	ldr	r3,[r14],#4			@ *K256++
1039	add	r4,r4,r2			@ h+=X[i]
1040	str	r2,[sp,#15*4]
1041	eor	r2,r10,r11
1042	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1043	and	r2,r2,r9
1044	add	r4,r4,r3			@ h+=K256[i]
1045	eor	r2,r2,r11			@ Ch(e,f,g)
1046	eor	r0,r5,r5,ror#11
1047	add	r4,r4,r2			@ h+=Ch(e,f,g)
1048#if 15==31
1049	and	r3,r3,#0xff
1050	cmp	r3,#0xf2			@ done?
1051#endif
1052#if 15<15
1053# if __ARM_ARCH__>=7
1054	ldr	r2,[r1],#4			@ prefetch
1055# else
1056	ldrb	r2,[r1,#3]
1057# endif
1058	eor	r3,r5,r6			@ a^b, b^c in next round
1059#else
1060	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1061	eor	r3,r5,r6			@ a^b, b^c in next round
1062	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1063#endif
1064	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1065	and	r12,r12,r3			@ (b^c)&=(a^b)
1066	add	r8,r8,r4			@ d+=h
1067	eor	r12,r12,r6			@ Maj(a,b,c)
1068	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1069	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1070.Lrounds_16_xx:
1071	@ ldr	r2,[sp,#1*4]		@ 16
1072	@ ldr	r1,[sp,#14*4]
1073	mov	r0,r2,ror#7
1074	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1075	mov	r12,r1,ror#17
1076	eor	r0,r0,r2,ror#18
1077	eor	r12,r12,r1,ror#19
1078	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1079	ldr	r2,[sp,#0*4]
1080	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1081	ldr	r1,[sp,#9*4]
1082
1083	add	r12,r12,r0
1084	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1085	add	r2,r2,r12
1086	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1087	add	r2,r2,r1			@ X[i]
1088	ldr	r12,[r14],#4			@ *K256++
1089	add	r11,r11,r2			@ h+=X[i]
1090	str	r2,[sp,#0*4]
1091	eor	r2,r9,r10
1092	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1093	and	r2,r2,r8
1094	add	r11,r11,r12			@ h+=K256[i]
1095	eor	r2,r2,r10			@ Ch(e,f,g)
1096	eor	r0,r4,r4,ror#11
1097	add	r11,r11,r2			@ h+=Ch(e,f,g)
1098#if 16==31
1099	and	r12,r12,#0xff
1100	cmp	r12,#0xf2			@ done?
1101#endif
1102#if 16<15
1103# if __ARM_ARCH__>=7
1104	ldr	r2,[r1],#4			@ prefetch
1105# else
1106	ldrb	r2,[r1,#3]
1107# endif
1108	eor	r12,r4,r5			@ a^b, b^c in next round
1109#else
1110	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1111	eor	r12,r4,r5			@ a^b, b^c in next round
1112	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1113#endif
1114	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1115	and	r3,r3,r12			@ (b^c)&=(a^b)
1116	add	r7,r7,r11			@ d+=h
1117	eor	r3,r3,r5			@ Maj(a,b,c)
1118	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1119	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1120	@ ldr	r2,[sp,#2*4]		@ 17
1121	@ ldr	r1,[sp,#15*4]
1122	mov	r0,r2,ror#7
1123	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1124	mov	r3,r1,ror#17
1125	eor	r0,r0,r2,ror#18
1126	eor	r3,r3,r1,ror#19
1127	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1128	ldr	r2,[sp,#1*4]
1129	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1130	ldr	r1,[sp,#10*4]
1131
1132	add	r3,r3,r0
1133	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1134	add	r2,r2,r3
1135	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1136	add	r2,r2,r1			@ X[i]
1137	ldr	r3,[r14],#4			@ *K256++
1138	add	r10,r10,r2			@ h+=X[i]
1139	str	r2,[sp,#1*4]
1140	eor	r2,r8,r9
1141	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1142	and	r2,r2,r7
1143	add	r10,r10,r3			@ h+=K256[i]
1144	eor	r2,r2,r9			@ Ch(e,f,g)
1145	eor	r0,r11,r11,ror#11
1146	add	r10,r10,r2			@ h+=Ch(e,f,g)
1147#if 17==31
1148	and	r3,r3,#0xff
1149	cmp	r3,#0xf2			@ done?
1150#endif
1151#if 17<15
1152# if __ARM_ARCH__>=7
1153	ldr	r2,[r1],#4			@ prefetch
1154# else
1155	ldrb	r2,[r1,#3]
1156# endif
1157	eor	r3,r11,r4			@ a^b, b^c in next round
1158#else
1159	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1160	eor	r3,r11,r4			@ a^b, b^c in next round
1161	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1162#endif
1163	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1164	and	r12,r12,r3			@ (b^c)&=(a^b)
1165	add	r6,r6,r10			@ d+=h
1166	eor	r12,r12,r4			@ Maj(a,b,c)
1167	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1168	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1169	@ ldr	r2,[sp,#3*4]		@ 18
1170	@ ldr	r1,[sp,#0*4]
1171	mov	r0,r2,ror#7
1172	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1173	mov	r12,r1,ror#17
1174	eor	r0,r0,r2,ror#18
1175	eor	r12,r12,r1,ror#19
1176	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1177	ldr	r2,[sp,#2*4]
1178	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1179	ldr	r1,[sp,#11*4]
1180
1181	add	r12,r12,r0
1182	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1183	add	r2,r2,r12
1184	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1185	add	r2,r2,r1			@ X[i]
1186	ldr	r12,[r14],#4			@ *K256++
1187	add	r9,r9,r2			@ h+=X[i]
1188	str	r2,[sp,#2*4]
1189	eor	r2,r7,r8
1190	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1191	and	r2,r2,r6
1192	add	r9,r9,r12			@ h+=K256[i]
1193	eor	r2,r2,r8			@ Ch(e,f,g)
1194	eor	r0,r10,r10,ror#11
1195	add	r9,r9,r2			@ h+=Ch(e,f,g)
1196#if 18==31
1197	and	r12,r12,#0xff
1198	cmp	r12,#0xf2			@ done?
1199#endif
1200#if 18<15
1201# if __ARM_ARCH__>=7
1202	ldr	r2,[r1],#4			@ prefetch
1203# else
1204	ldrb	r2,[r1,#3]
1205# endif
1206	eor	r12,r10,r11			@ a^b, b^c in next round
1207#else
1208	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1209	eor	r12,r10,r11			@ a^b, b^c in next round
1210	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1211#endif
1212	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1213	and	r3,r3,r12			@ (b^c)&=(a^b)
1214	add	r5,r5,r9			@ d+=h
1215	eor	r3,r3,r11			@ Maj(a,b,c)
1216	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1217	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1218	@ ldr	r2,[sp,#4*4]		@ 19
1219	@ ldr	r1,[sp,#1*4]
1220	mov	r0,r2,ror#7
1221	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1222	mov	r3,r1,ror#17
1223	eor	r0,r0,r2,ror#18
1224	eor	r3,r3,r1,ror#19
1225	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1226	ldr	r2,[sp,#3*4]
1227	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1228	ldr	r1,[sp,#12*4]
1229
1230	add	r3,r3,r0
1231	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1232	add	r2,r2,r3
1233	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1234	add	r2,r2,r1			@ X[i]
1235	ldr	r3,[r14],#4			@ *K256++
1236	add	r8,r8,r2			@ h+=X[i]
1237	str	r2,[sp,#3*4]
1238	eor	r2,r6,r7
1239	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1240	and	r2,r2,r5
1241	add	r8,r8,r3			@ h+=K256[i]
1242	eor	r2,r2,r7			@ Ch(e,f,g)
1243	eor	r0,r9,r9,ror#11
1244	add	r8,r8,r2			@ h+=Ch(e,f,g)
1245#if 19==31
1246	and	r3,r3,#0xff
1247	cmp	r3,#0xf2			@ done?
1248#endif
1249#if 19<15
1250# if __ARM_ARCH__>=7
1251	ldr	r2,[r1],#4			@ prefetch
1252# else
1253	ldrb	r2,[r1,#3]
1254# endif
1255	eor	r3,r9,r10			@ a^b, b^c in next round
1256#else
1257	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1258	eor	r3,r9,r10			@ a^b, b^c in next round
1259	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1260#endif
1261	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1262	and	r12,r12,r3			@ (b^c)&=(a^b)
1263	add	r4,r4,r8			@ d+=h
1264	eor	r12,r12,r10			@ Maj(a,b,c)
1265	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1266	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1267	@ ldr	r2,[sp,#5*4]		@ 20
1268	@ ldr	r1,[sp,#2*4]
1269	mov	r0,r2,ror#7
1270	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1271	mov	r12,r1,ror#17
1272	eor	r0,r0,r2,ror#18
1273	eor	r12,r12,r1,ror#19
1274	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1275	ldr	r2,[sp,#4*4]
1276	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1277	ldr	r1,[sp,#13*4]
1278
1279	add	r12,r12,r0
1280	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1281	add	r2,r2,r12
1282	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1283	add	r2,r2,r1			@ X[i]
1284	ldr	r12,[r14],#4			@ *K256++
1285	add	r7,r7,r2			@ h+=X[i]
1286	str	r2,[sp,#4*4]
1287	eor	r2,r5,r6
1288	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1289	and	r2,r2,r4
1290	add	r7,r7,r12			@ h+=K256[i]
1291	eor	r2,r2,r6			@ Ch(e,f,g)
1292	eor	r0,r8,r8,ror#11
1293	add	r7,r7,r2			@ h+=Ch(e,f,g)
1294#if 20==31
1295	and	r12,r12,#0xff
1296	cmp	r12,#0xf2			@ done?
1297#endif
1298#if 20<15
1299# if __ARM_ARCH__>=7
1300	ldr	r2,[r1],#4			@ prefetch
1301# else
1302	ldrb	r2,[r1,#3]
1303# endif
1304	eor	r12,r8,r9			@ a^b, b^c in next round
1305#else
1306	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1307	eor	r12,r8,r9			@ a^b, b^c in next round
1308	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1309#endif
1310	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1311	and	r3,r3,r12			@ (b^c)&=(a^b)
1312	add	r11,r11,r7			@ d+=h
1313	eor	r3,r3,r9			@ Maj(a,b,c)
1314	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1315	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1316	@ ldr	r2,[sp,#6*4]		@ 21
1317	@ ldr	r1,[sp,#3*4]
1318	mov	r0,r2,ror#7
1319	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1320	mov	r3,r1,ror#17
1321	eor	r0,r0,r2,ror#18
1322	eor	r3,r3,r1,ror#19
1323	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1324	ldr	r2,[sp,#5*4]
1325	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1326	ldr	r1,[sp,#14*4]
1327
1328	add	r3,r3,r0
1329	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1330	add	r2,r2,r3
1331	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1332	add	r2,r2,r1			@ X[i]
1333	ldr	r3,[r14],#4			@ *K256++
1334	add	r6,r6,r2			@ h+=X[i]
1335	str	r2,[sp,#5*4]
1336	eor	r2,r4,r5
1337	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1338	and	r2,r2,r11
1339	add	r6,r6,r3			@ h+=K256[i]
1340	eor	r2,r2,r5			@ Ch(e,f,g)
1341	eor	r0,r7,r7,ror#11
1342	add	r6,r6,r2			@ h+=Ch(e,f,g)
1343#if 21==31
1344	and	r3,r3,#0xff
1345	cmp	r3,#0xf2			@ done?
1346#endif
1347#if 21<15
1348# if __ARM_ARCH__>=7
1349	ldr	r2,[r1],#4			@ prefetch
1350# else
1351	ldrb	r2,[r1,#3]
1352# endif
1353	eor	r3,r7,r8			@ a^b, b^c in next round
1354#else
1355	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1356	eor	r3,r7,r8			@ a^b, b^c in next round
1357	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1358#endif
1359	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1360	and	r12,r12,r3			@ (b^c)&=(a^b)
1361	add	r10,r10,r6			@ d+=h
1362	eor	r12,r12,r8			@ Maj(a,b,c)
1363	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1364	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1365	@ ldr	r2,[sp,#7*4]		@ 22
1366	@ ldr	r1,[sp,#4*4]
1367	mov	r0,r2,ror#7
1368	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1369	mov	r12,r1,ror#17
1370	eor	r0,r0,r2,ror#18
1371	eor	r12,r12,r1,ror#19
1372	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1373	ldr	r2,[sp,#6*4]
1374	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1375	ldr	r1,[sp,#15*4]
1376
1377	add	r12,r12,r0
1378	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1379	add	r2,r2,r12
1380	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1381	add	r2,r2,r1			@ X[i]
1382	ldr	r12,[r14],#4			@ *K256++
1383	add	r5,r5,r2			@ h+=X[i]
1384	str	r2,[sp,#6*4]
1385	eor	r2,r11,r4
1386	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1387	and	r2,r2,r10
1388	add	r5,r5,r12			@ h+=K256[i]
1389	eor	r2,r2,r4			@ Ch(e,f,g)
1390	eor	r0,r6,r6,ror#11
1391	add	r5,r5,r2			@ h+=Ch(e,f,g)
1392#if 22==31
1393	and	r12,r12,#0xff
1394	cmp	r12,#0xf2			@ done?
1395#endif
1396#if 22<15
1397# if __ARM_ARCH__>=7
1398	ldr	r2,[r1],#4			@ prefetch
1399# else
1400	ldrb	r2,[r1,#3]
1401# endif
1402	eor	r12,r6,r7			@ a^b, b^c in next round
1403#else
1404	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1405	eor	r12,r6,r7			@ a^b, b^c in next round
1406	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1407#endif
1408	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1409	and	r3,r3,r12			@ (b^c)&=(a^b)
1410	add	r9,r9,r5			@ d+=h
1411	eor	r3,r3,r7			@ Maj(a,b,c)
1412	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1413	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1414	@ ldr	r2,[sp,#8*4]		@ 23
1415	@ ldr	r1,[sp,#5*4]
1416	mov	r0,r2,ror#7
1417	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1418	mov	r3,r1,ror#17
1419	eor	r0,r0,r2,ror#18
1420	eor	r3,r3,r1,ror#19
1421	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1422	ldr	r2,[sp,#7*4]
1423	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1424	ldr	r1,[sp,#0*4]
1425
1426	add	r3,r3,r0
1427	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1428	add	r2,r2,r3
1429	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1430	add	r2,r2,r1			@ X[i]
1431	ldr	r3,[r14],#4			@ *K256++
1432	add	r4,r4,r2			@ h+=X[i]
1433	str	r2,[sp,#7*4]
1434	eor	r2,r10,r11
1435	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1436	and	r2,r2,r9
1437	add	r4,r4,r3			@ h+=K256[i]
1438	eor	r2,r2,r11			@ Ch(e,f,g)
1439	eor	r0,r5,r5,ror#11
1440	add	r4,r4,r2			@ h+=Ch(e,f,g)
1441#if 23==31
1442	and	r3,r3,#0xff
1443	cmp	r3,#0xf2			@ done?
1444#endif
1445#if 23<15
1446# if __ARM_ARCH__>=7
1447	ldr	r2,[r1],#4			@ prefetch
1448# else
1449	ldrb	r2,[r1,#3]
1450# endif
1451	eor	r3,r5,r6			@ a^b, b^c in next round
1452#else
1453	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1454	eor	r3,r5,r6			@ a^b, b^c in next round
1455	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1456#endif
1457	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1458	and	r12,r12,r3			@ (b^c)&=(a^b)
1459	add	r8,r8,r4			@ d+=h
1460	eor	r12,r12,r6			@ Maj(a,b,c)
1461	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1462	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1463	@ ldr	r2,[sp,#9*4]		@ 24
1464	@ ldr	r1,[sp,#6*4]
1465	mov	r0,r2,ror#7
1466	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1467	mov	r12,r1,ror#17
1468	eor	r0,r0,r2,ror#18
1469	eor	r12,r12,r1,ror#19
1470	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1471	ldr	r2,[sp,#8*4]
1472	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1473	ldr	r1,[sp,#1*4]
1474
1475	add	r12,r12,r0
1476	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1477	add	r2,r2,r12
1478	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1479	add	r2,r2,r1			@ X[i]
1480	ldr	r12,[r14],#4			@ *K256++
1481	add	r11,r11,r2			@ h+=X[i]
1482	str	r2,[sp,#8*4]
1483	eor	r2,r9,r10
1484	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1485	and	r2,r2,r8
1486	add	r11,r11,r12			@ h+=K256[i]
1487	eor	r2,r2,r10			@ Ch(e,f,g)
1488	eor	r0,r4,r4,ror#11
1489	add	r11,r11,r2			@ h+=Ch(e,f,g)
1490#if 24==31
1491	and	r12,r12,#0xff
1492	cmp	r12,#0xf2			@ done?
1493#endif
1494#if 24<15
1495# if __ARM_ARCH__>=7
1496	ldr	r2,[r1],#4			@ prefetch
1497# else
1498	ldrb	r2,[r1,#3]
1499# endif
1500	eor	r12,r4,r5			@ a^b, b^c in next round
1501#else
1502	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1503	eor	r12,r4,r5			@ a^b, b^c in next round
1504	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1505#endif
1506	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1507	and	r3,r3,r12			@ (b^c)&=(a^b)
1508	add	r7,r7,r11			@ d+=h
1509	eor	r3,r3,r5			@ Maj(a,b,c)
1510	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1511	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1512	@ ldr	r2,[sp,#10*4]		@ 25
1513	@ ldr	r1,[sp,#7*4]
1514	mov	r0,r2,ror#7
1515	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1516	mov	r3,r1,ror#17
1517	eor	r0,r0,r2,ror#18
1518	eor	r3,r3,r1,ror#19
1519	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1520	ldr	r2,[sp,#9*4]
1521	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1522	ldr	r1,[sp,#2*4]
1523
1524	add	r3,r3,r0
1525	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1526	add	r2,r2,r3
1527	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1528	add	r2,r2,r1			@ X[i]
1529	ldr	r3,[r14],#4			@ *K256++
1530	add	r10,r10,r2			@ h+=X[i]
1531	str	r2,[sp,#9*4]
1532	eor	r2,r8,r9
1533	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1534	and	r2,r2,r7
1535	add	r10,r10,r3			@ h+=K256[i]
1536	eor	r2,r2,r9			@ Ch(e,f,g)
1537	eor	r0,r11,r11,ror#11
1538	add	r10,r10,r2			@ h+=Ch(e,f,g)
1539#if 25==31
1540	and	r3,r3,#0xff
1541	cmp	r3,#0xf2			@ done?
1542#endif
1543#if 25<15
1544# if __ARM_ARCH__>=7
1545	ldr	r2,[r1],#4			@ prefetch
1546# else
1547	ldrb	r2,[r1,#3]
1548# endif
1549	eor	r3,r11,r4			@ a^b, b^c in next round
1550#else
1551	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1552	eor	r3,r11,r4			@ a^b, b^c in next round
1553	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1554#endif
1555	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1556	and	r12,r12,r3			@ (b^c)&=(a^b)
1557	add	r6,r6,r10			@ d+=h
1558	eor	r12,r12,r4			@ Maj(a,b,c)
1559	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1560	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1561	@ ldr	r2,[sp,#11*4]		@ 26
1562	@ ldr	r1,[sp,#8*4]
1563	mov	r0,r2,ror#7
1564	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1565	mov	r12,r1,ror#17
1566	eor	r0,r0,r2,ror#18
1567	eor	r12,r12,r1,ror#19
1568	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1569	ldr	r2,[sp,#10*4]
1570	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1571	ldr	r1,[sp,#3*4]
1572
1573	add	r12,r12,r0
1574	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1575	add	r2,r2,r12
1576	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1577	add	r2,r2,r1			@ X[i]
1578	ldr	r12,[r14],#4			@ *K256++
1579	add	r9,r9,r2			@ h+=X[i]
1580	str	r2,[sp,#10*4]
1581	eor	r2,r7,r8
1582	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1583	and	r2,r2,r6
1584	add	r9,r9,r12			@ h+=K256[i]
1585	eor	r2,r2,r8			@ Ch(e,f,g)
1586	eor	r0,r10,r10,ror#11
1587	add	r9,r9,r2			@ h+=Ch(e,f,g)
1588#if 26==31
1589	and	r12,r12,#0xff
1590	cmp	r12,#0xf2			@ done?
1591#endif
1592#if 26<15
1593# if __ARM_ARCH__>=7
1594	ldr	r2,[r1],#4			@ prefetch
1595# else
1596	ldrb	r2,[r1,#3]
1597# endif
1598	eor	r12,r10,r11			@ a^b, b^c in next round
1599#else
1600	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1601	eor	r12,r10,r11			@ a^b, b^c in next round
1602	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1603#endif
1604	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1605	and	r3,r3,r12			@ (b^c)&=(a^b)
1606	add	r5,r5,r9			@ d+=h
1607	eor	r3,r3,r11			@ Maj(a,b,c)
1608	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1609	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1610	@ ldr	r2,[sp,#12*4]		@ 27
1611	@ ldr	r1,[sp,#9*4]
1612	mov	r0,r2,ror#7
1613	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1614	mov	r3,r1,ror#17
1615	eor	r0,r0,r2,ror#18
1616	eor	r3,r3,r1,ror#19
1617	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1618	ldr	r2,[sp,#11*4]
1619	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1620	ldr	r1,[sp,#4*4]
1621
1622	add	r3,r3,r0
1623	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1624	add	r2,r2,r3
1625	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1626	add	r2,r2,r1			@ X[i]
1627	ldr	r3,[r14],#4			@ *K256++
1628	add	r8,r8,r2			@ h+=X[i]
1629	str	r2,[sp,#11*4]
1630	eor	r2,r6,r7
1631	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1632	and	r2,r2,r5
1633	add	r8,r8,r3			@ h+=K256[i]
1634	eor	r2,r2,r7			@ Ch(e,f,g)
1635	eor	r0,r9,r9,ror#11
1636	add	r8,r8,r2			@ h+=Ch(e,f,g)
1637#if 27==31
1638	and	r3,r3,#0xff
1639	cmp	r3,#0xf2			@ done?
1640#endif
1641#if 27<15
1642# if __ARM_ARCH__>=7
1643	ldr	r2,[r1],#4			@ prefetch
1644# else
1645	ldrb	r2,[r1,#3]
1646# endif
1647	eor	r3,r9,r10			@ a^b, b^c in next round
1648#else
1649	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1650	eor	r3,r9,r10			@ a^b, b^c in next round
1651	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1652#endif
1653	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1654	and	r12,r12,r3			@ (b^c)&=(a^b)
1655	add	r4,r4,r8			@ d+=h
1656	eor	r12,r12,r10			@ Maj(a,b,c)
1657	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1658	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1659	@ ldr	r2,[sp,#13*4]		@ 28
1660	@ ldr	r1,[sp,#10*4]
1661	mov	r0,r2,ror#7
1662	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1663	mov	r12,r1,ror#17
1664	eor	r0,r0,r2,ror#18
1665	eor	r12,r12,r1,ror#19
1666	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1667	ldr	r2,[sp,#12*4]
1668	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1669	ldr	r1,[sp,#5*4]
1670
1671	add	r12,r12,r0
1672	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1673	add	r2,r2,r12
1674	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1675	add	r2,r2,r1			@ X[i]
1676	ldr	r12,[r14],#4			@ *K256++
1677	add	r7,r7,r2			@ h+=X[i]
1678	str	r2,[sp,#12*4]
1679	eor	r2,r5,r6
1680	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1681	and	r2,r2,r4
1682	add	r7,r7,r12			@ h+=K256[i]
1683	eor	r2,r2,r6			@ Ch(e,f,g)
1684	eor	r0,r8,r8,ror#11
1685	add	r7,r7,r2			@ h+=Ch(e,f,g)
1686#if 28==31
1687	and	r12,r12,#0xff
1688	cmp	r12,#0xf2			@ done?
1689#endif
1690#if 28<15
1691# if __ARM_ARCH__>=7
1692	ldr	r2,[r1],#4			@ prefetch
1693# else
1694	ldrb	r2,[r1,#3]
1695# endif
1696	eor	r12,r8,r9			@ a^b, b^c in next round
1697#else
1698	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1699	eor	r12,r8,r9			@ a^b, b^c in next round
1700	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1701#endif
1702	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1703	and	r3,r3,r12			@ (b^c)&=(a^b)
1704	add	r11,r11,r7			@ d+=h
1705	eor	r3,r3,r9			@ Maj(a,b,c)
1706	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1707	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1708	@ ldr	r2,[sp,#14*4]		@ 29
1709	@ ldr	r1,[sp,#11*4]
1710	mov	r0,r2,ror#7
1711	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1712	mov	r3,r1,ror#17
1713	eor	r0,r0,r2,ror#18
1714	eor	r3,r3,r1,ror#19
1715	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1716	ldr	r2,[sp,#13*4]
1717	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1718	ldr	r1,[sp,#6*4]
1719
1720	add	r3,r3,r0
1721	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1722	add	r2,r2,r3
1723	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1724	add	r2,r2,r1			@ X[i]
1725	ldr	r3,[r14],#4			@ *K256++
1726	add	r6,r6,r2			@ h+=X[i]
1727	str	r2,[sp,#13*4]
1728	eor	r2,r4,r5
1729	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1730	and	r2,r2,r11
1731	add	r6,r6,r3			@ h+=K256[i]
1732	eor	r2,r2,r5			@ Ch(e,f,g)
1733	eor	r0,r7,r7,ror#11
1734	add	r6,r6,r2			@ h+=Ch(e,f,g)
1735#if 29==31
1736	and	r3,r3,#0xff
1737	cmp	r3,#0xf2			@ done?
1738#endif
1739#if 29<15
1740# if __ARM_ARCH__>=7
1741	ldr	r2,[r1],#4			@ prefetch
1742# else
1743	ldrb	r2,[r1,#3]
1744# endif
1745	eor	r3,r7,r8			@ a^b, b^c in next round
1746#else
1747	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1748	eor	r3,r7,r8			@ a^b, b^c in next round
1749	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1750#endif
1751	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1752	and	r12,r12,r3			@ (b^c)&=(a^b)
1753	add	r10,r10,r6			@ d+=h
1754	eor	r12,r12,r8			@ Maj(a,b,c)
1755	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1756	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1757	@ ldr	r2,[sp,#15*4]		@ 30
1758	@ ldr	r1,[sp,#12*4]
1759	mov	r0,r2,ror#7
1760	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1761	mov	r12,r1,ror#17
1762	eor	r0,r0,r2,ror#18
1763	eor	r12,r12,r1,ror#19
1764	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1765	ldr	r2,[sp,#14*4]
1766	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1767	ldr	r1,[sp,#7*4]
1768
1769	add	r12,r12,r0
1770	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1771	add	r2,r2,r12
1772	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1773	add	r2,r2,r1			@ X[i]
1774	ldr	r12,[r14],#4			@ *K256++
1775	add	r5,r5,r2			@ h+=X[i]
1776	str	r2,[sp,#14*4]
1777	eor	r2,r11,r4
1778	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1779	and	r2,r2,r10
1780	add	r5,r5,r12			@ h+=K256[i]
1781	eor	r2,r2,r4			@ Ch(e,f,g)
1782	eor	r0,r6,r6,ror#11
1783	add	r5,r5,r2			@ h+=Ch(e,f,g)
1784#if 30==31
1785	and	r12,r12,#0xff
1786	cmp	r12,#0xf2			@ done?
1787#endif
1788#if 30<15
1789# if __ARM_ARCH__>=7
1790	ldr	r2,[r1],#4			@ prefetch
1791# else
1792	ldrb	r2,[r1,#3]
1793# endif
1794	eor	r12,r6,r7			@ a^b, b^c in next round
1795#else
1796	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1797	eor	r12,r6,r7			@ a^b, b^c in next round
1798	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1799#endif
1800	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1801	and	r3,r3,r12			@ (b^c)&=(a^b)
1802	add	r9,r9,r5			@ d+=h
1803	eor	r3,r3,r7			@ Maj(a,b,c)
1804	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1805	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1806	@ ldr	r2,[sp,#0*4]		@ 31
1807	@ ldr	r1,[sp,#13*4]
1808	mov	r0,r2,ror#7
1809	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1810	mov	r3,r1,ror#17
1811	eor	r0,r0,r2,ror#18
1812	eor	r3,r3,r1,ror#19
1813	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1814	ldr	r2,[sp,#15*4]
1815	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1816	ldr	r1,[sp,#8*4]
1817
1818	add	r3,r3,r0
1819	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1820	add	r2,r2,r3
1821	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1822	add	r2,r2,r1			@ X[i]
1823	ldr	r3,[r14],#4			@ *K256++
1824	add	r4,r4,r2			@ h+=X[i]
1825	str	r2,[sp,#15*4]
1826	eor	r2,r10,r11
1827	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1828	and	r2,r2,r9
1829	add	r4,r4,r3			@ h+=K256[i]
1830	eor	r2,r2,r11			@ Ch(e,f,g)
1831	eor	r0,r5,r5,ror#11
1832	add	r4,r4,r2			@ h+=Ch(e,f,g)
1833#if 31==31
1834	and	r3,r3,#0xff
1835	cmp	r3,#0xf2			@ done?
1836#endif
1837#if 31<15
1838# if __ARM_ARCH__>=7
1839	ldr	r2,[r1],#4			@ prefetch
1840# else
1841	ldrb	r2,[r1,#3]
1842# endif
1843	eor	r3,r5,r6			@ a^b, b^c in next round
1844#else
1845	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1846	eor	r3,r5,r6			@ a^b, b^c in next round
1847	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1848#endif
1849	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1850	and	r12,r12,r3			@ (b^c)&=(a^b)
1851	add	r8,r8,r4			@ d+=h
1852	eor	r12,r12,r6			@ Maj(a,b,c)
1853	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1854	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1855#if __ARM_ARCH__>=7
1856	ite	eq			@ Thumb2 thing, sanity check in ARM
1857#endif
1858	ldreq	r3,[sp,#16*4]		@ pull ctx
1859	bne	.Lrounds_16_xx
1860
1861	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1862	ldr	r0,[r3,#0]
1863	ldr	r2,[r3,#4]
1864	ldr	r12,[r3,#8]
1865	add	r4,r4,r0
1866	ldr	r0,[r3,#12]
1867	add	r5,r5,r2
1868	ldr	r2,[r3,#16]
1869	add	r6,r6,r12
1870	ldr	r12,[r3,#20]
1871	add	r7,r7,r0
1872	ldr	r0,[r3,#24]
1873	add	r8,r8,r2
1874	ldr	r2,[r3,#28]
1875	add	r9,r9,r12
1876	ldr	r1,[sp,#17*4]		@ pull inp
1877	ldr	r12,[sp,#18*4]		@ pull inp+len
1878	add	r10,r10,r0
1879	add	r11,r11,r2
1880	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1881	cmp	r1,r12
1882	sub	r14,r14,#256	@ rewind Ktbl
1883	bne	.Loop
1884
1885	add	sp,sp,#19*4	@ destroy frame
1886#if __ARM_ARCH__>=5
1887	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1888#else
1889	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1890	tst	lr,#1
1891	moveq	pc,lr			@ be binary compatible with V4, yet
1892.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1893#endif
1894.size	sha256_block_data_order,.-sha256_block_data_order
1895#if __ARM_MAX_ARCH__>=7
1896.arch	armv7-a
1897.fpu	neon
1898
1899.globl	sha256_block_data_order_neon
1900.hidden	sha256_block_data_order_neon
1901.type	sha256_block_data_order_neon,%function
1902.align	5
1903.skip	16
1904sha256_block_data_order_neon:
1905.LNEON:
1906	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1907
1908	sub	r11,sp,#16*4+16
1909	adr	r14,K256
1910	bic	r11,r11,#15		@ align for 128-bit stores
1911	mov	r12,sp
1912	mov	sp,r11			@ alloca
1913	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1914
1915	vld1.8	{q0},[r1]!
1916	vld1.8	{q1},[r1]!
1917	vld1.8	{q2},[r1]!
1918	vld1.8	{q3},[r1]!
1919	vld1.32	{q8},[r14,:128]!
1920	vld1.32	{q9},[r14,:128]!
1921	vld1.32	{q10},[r14,:128]!
1922	vld1.32	{q11},[r14,:128]!
1923	vrev32.8	q0,q0		@ yes, even on
1924	str	r0,[sp,#64]
1925	vrev32.8	q1,q1		@ big-endian
1926	str	r1,[sp,#68]
1927	mov	r1,sp
1928	vrev32.8	q2,q2
1929	str	r2,[sp,#72]
1930	vrev32.8	q3,q3
1931	str	r12,[sp,#76]		@ save original sp
1932	vadd.i32	q8,q8,q0
1933	vadd.i32	q9,q9,q1
1934	vst1.32	{q8},[r1,:128]!
1935	vadd.i32	q10,q10,q2
1936	vst1.32	{q9},[r1,:128]!
1937	vadd.i32	q11,q11,q3
1938	vst1.32	{q10},[r1,:128]!
1939	vst1.32	{q11},[r1,:128]!
1940
1941	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1942	sub	r1,r1,#64
1943	ldr	r2,[sp,#0]
1944	eor	r12,r12,r12
1945	eor	r3,r5,r6
1946	b	.L_00_48
1947
1948.align	4
1949.L_00_48:
1950	vext.8	q8,q0,q1,#4
1951	add	r11,r11,r2
1952	eor	r2,r9,r10
1953	eor	r0,r8,r8,ror#5
1954	vext.8	q9,q2,q3,#4
1955	add	r4,r4,r12
1956	and	r2,r2,r8
1957	eor	r12,r0,r8,ror#19
1958	vshr.u32	q10,q8,#7
1959	eor	r0,r4,r4,ror#11
1960	eor	r2,r2,r10
1961	vadd.i32	q0,q0,q9
1962	add	r11,r11,r12,ror#6
1963	eor	r12,r4,r5
1964	vshr.u32	q9,q8,#3
1965	eor	r0,r0,r4,ror#20
1966	add	r11,r11,r2
1967	vsli.32	q10,q8,#25
1968	ldr	r2,[sp,#4]
1969	and	r3,r3,r12
1970	vshr.u32	q11,q8,#18
1971	add	r7,r7,r11
1972	add	r11,r11,r0,ror#2
1973	eor	r3,r3,r5
1974	veor	q9,q9,q10
1975	add	r10,r10,r2
1976	vsli.32	q11,q8,#14
1977	eor	r2,r8,r9
1978	eor	r0,r7,r7,ror#5
1979	vshr.u32	d24,d7,#17
1980	add	r11,r11,r3
1981	and	r2,r2,r7
1982	veor	q9,q9,q11
1983	eor	r3,r0,r7,ror#19
1984	eor	r0,r11,r11,ror#11
1985	vsli.32	d24,d7,#15
1986	eor	r2,r2,r9
1987	add	r10,r10,r3,ror#6
1988	vshr.u32	d25,d7,#10
1989	eor	r3,r11,r4
1990	eor	r0,r0,r11,ror#20
1991	vadd.i32	q0,q0,q9
1992	add	r10,r10,r2
1993	ldr	r2,[sp,#8]
1994	veor	d25,d25,d24
1995	and	r12,r12,r3
1996	add	r6,r6,r10
1997	vshr.u32	d24,d7,#19
1998	add	r10,r10,r0,ror#2
1999	eor	r12,r12,r4
2000	vsli.32	d24,d7,#13
2001	add	r9,r9,r2
2002	eor	r2,r7,r8
2003	veor	d25,d25,d24
2004	eor	r0,r6,r6,ror#5
2005	add	r10,r10,r12
2006	vadd.i32	d0,d0,d25
2007	and	r2,r2,r6
2008	eor	r12,r0,r6,ror#19
2009	vshr.u32	d24,d0,#17
2010	eor	r0,r10,r10,ror#11
2011	eor	r2,r2,r8
2012	vsli.32	d24,d0,#15
2013	add	r9,r9,r12,ror#6
2014	eor	r12,r10,r11
2015	vshr.u32	d25,d0,#10
2016	eor	r0,r0,r10,ror#20
2017	add	r9,r9,r2
2018	veor	d25,d25,d24
2019	ldr	r2,[sp,#12]
2020	and	r3,r3,r12
2021	vshr.u32	d24,d0,#19
2022	add	r5,r5,r9
2023	add	r9,r9,r0,ror#2
2024	eor	r3,r3,r11
2025	vld1.32	{q8},[r14,:128]!
2026	add	r8,r8,r2
2027	vsli.32	d24,d0,#13
2028	eor	r2,r6,r7
2029	eor	r0,r5,r5,ror#5
2030	veor	d25,d25,d24
2031	add	r9,r9,r3
2032	and	r2,r2,r5
2033	vadd.i32	d1,d1,d25
2034	eor	r3,r0,r5,ror#19
2035	eor	r0,r9,r9,ror#11
2036	vadd.i32	q8,q8,q0
2037	eor	r2,r2,r7
2038	add	r8,r8,r3,ror#6
2039	eor	r3,r9,r10
2040	eor	r0,r0,r9,ror#20
2041	add	r8,r8,r2
2042	ldr	r2,[sp,#16]
2043	and	r12,r12,r3
2044	add	r4,r4,r8
2045	vst1.32	{q8},[r1,:128]!
2046	add	r8,r8,r0,ror#2
2047	eor	r12,r12,r10
2048	vext.8	q8,q1,q2,#4
2049	add	r7,r7,r2
2050	eor	r2,r5,r6
2051	eor	r0,r4,r4,ror#5
2052	vext.8	q9,q3,q0,#4
2053	add	r8,r8,r12
2054	and	r2,r2,r4
2055	eor	r12,r0,r4,ror#19
2056	vshr.u32	q10,q8,#7
2057	eor	r0,r8,r8,ror#11
2058	eor	r2,r2,r6
2059	vadd.i32	q1,q1,q9
2060	add	r7,r7,r12,ror#6
2061	eor	r12,r8,r9
2062	vshr.u32	q9,q8,#3
2063	eor	r0,r0,r8,ror#20
2064	add	r7,r7,r2
2065	vsli.32	q10,q8,#25
2066	ldr	r2,[sp,#20]
2067	and	r3,r3,r12
2068	vshr.u32	q11,q8,#18
2069	add	r11,r11,r7
2070	add	r7,r7,r0,ror#2
2071	eor	r3,r3,r9
2072	veor	q9,q9,q10
2073	add	r6,r6,r2
2074	vsli.32	q11,q8,#14
2075	eor	r2,r4,r5
2076	eor	r0,r11,r11,ror#5
2077	vshr.u32	d24,d1,#17
2078	add	r7,r7,r3
2079	and	r2,r2,r11
2080	veor	q9,q9,q11
2081	eor	r3,r0,r11,ror#19
2082	eor	r0,r7,r7,ror#11
2083	vsli.32	d24,d1,#15
2084	eor	r2,r2,r5
2085	add	r6,r6,r3,ror#6
2086	vshr.u32	d25,d1,#10
2087	eor	r3,r7,r8
2088	eor	r0,r0,r7,ror#20
2089	vadd.i32	q1,q1,q9
2090	add	r6,r6,r2
2091	ldr	r2,[sp,#24]
2092	veor	d25,d25,d24
2093	and	r12,r12,r3
2094	add	r10,r10,r6
2095	vshr.u32	d24,d1,#19
2096	add	r6,r6,r0,ror#2
2097	eor	r12,r12,r8
2098	vsli.32	d24,d1,#13
2099	add	r5,r5,r2
2100	eor	r2,r11,r4
2101	veor	d25,d25,d24
2102	eor	r0,r10,r10,ror#5
2103	add	r6,r6,r12
2104	vadd.i32	d2,d2,d25
2105	and	r2,r2,r10
2106	eor	r12,r0,r10,ror#19
2107	vshr.u32	d24,d2,#17
2108	eor	r0,r6,r6,ror#11
2109	eor	r2,r2,r4
2110	vsli.32	d24,d2,#15
2111	add	r5,r5,r12,ror#6
2112	eor	r12,r6,r7
2113	vshr.u32	d25,d2,#10
2114	eor	r0,r0,r6,ror#20
2115	add	r5,r5,r2
2116	veor	d25,d25,d24
2117	ldr	r2,[sp,#28]
2118	and	r3,r3,r12
2119	vshr.u32	d24,d2,#19
2120	add	r9,r9,r5
2121	add	r5,r5,r0,ror#2
2122	eor	r3,r3,r7
2123	vld1.32	{q8},[r14,:128]!
2124	add	r4,r4,r2
2125	vsli.32	d24,d2,#13
2126	eor	r2,r10,r11
2127	eor	r0,r9,r9,ror#5
2128	veor	d25,d25,d24
2129	add	r5,r5,r3
2130	and	r2,r2,r9
2131	vadd.i32	d3,d3,d25
2132	eor	r3,r0,r9,ror#19
2133	eor	r0,r5,r5,ror#11
2134	vadd.i32	q8,q8,q1
2135	eor	r2,r2,r11
2136	add	r4,r4,r3,ror#6
2137	eor	r3,r5,r6
2138	eor	r0,r0,r5,ror#20
2139	add	r4,r4,r2
2140	ldr	r2,[sp,#32]
2141	and	r12,r12,r3
2142	add	r8,r8,r4
2143	vst1.32	{q8},[r1,:128]!
2144	add	r4,r4,r0,ror#2
2145	eor	r12,r12,r6
2146	vext.8	q8,q2,q3,#4
2147	add	r11,r11,r2
2148	eor	r2,r9,r10
2149	eor	r0,r8,r8,ror#5
2150	vext.8	q9,q0,q1,#4
2151	add	r4,r4,r12
2152	and	r2,r2,r8
2153	eor	r12,r0,r8,ror#19
2154	vshr.u32	q10,q8,#7
2155	eor	r0,r4,r4,ror#11
2156	eor	r2,r2,r10
2157	vadd.i32	q2,q2,q9
2158	add	r11,r11,r12,ror#6
2159	eor	r12,r4,r5
2160	vshr.u32	q9,q8,#3
2161	eor	r0,r0,r4,ror#20
2162	add	r11,r11,r2
2163	vsli.32	q10,q8,#25
2164	ldr	r2,[sp,#36]
2165	and	r3,r3,r12
2166	vshr.u32	q11,q8,#18
2167	add	r7,r7,r11
2168	add	r11,r11,r0,ror#2
2169	eor	r3,r3,r5
2170	veor	q9,q9,q10
2171	add	r10,r10,r2
2172	vsli.32	q11,q8,#14
2173	eor	r2,r8,r9
2174	eor	r0,r7,r7,ror#5
2175	vshr.u32	d24,d3,#17
2176	add	r11,r11,r3
2177	and	r2,r2,r7
2178	veor	q9,q9,q11
2179	eor	r3,r0,r7,ror#19
2180	eor	r0,r11,r11,ror#11
2181	vsli.32	d24,d3,#15
2182	eor	r2,r2,r9
2183	add	r10,r10,r3,ror#6
2184	vshr.u32	d25,d3,#10
2185	eor	r3,r11,r4
2186	eor	r0,r0,r11,ror#20
2187	vadd.i32	q2,q2,q9
2188	add	r10,r10,r2
2189	ldr	r2,[sp,#40]
2190	veor	d25,d25,d24
2191	and	r12,r12,r3
2192	add	r6,r6,r10
2193	vshr.u32	d24,d3,#19
2194	add	r10,r10,r0,ror#2
2195	eor	r12,r12,r4
2196	vsli.32	d24,d3,#13
2197	add	r9,r9,r2
2198	eor	r2,r7,r8
2199	veor	d25,d25,d24
2200	eor	r0,r6,r6,ror#5
2201	add	r10,r10,r12
2202	vadd.i32	d4,d4,d25
2203	and	r2,r2,r6
2204	eor	r12,r0,r6,ror#19
2205	vshr.u32	d24,d4,#17
2206	eor	r0,r10,r10,ror#11
2207	eor	r2,r2,r8
2208	vsli.32	d24,d4,#15
2209	add	r9,r9,r12,ror#6
2210	eor	r12,r10,r11
2211	vshr.u32	d25,d4,#10
2212	eor	r0,r0,r10,ror#20
2213	add	r9,r9,r2
2214	veor	d25,d25,d24
2215	ldr	r2,[sp,#44]
2216	and	r3,r3,r12
2217	vshr.u32	d24,d4,#19
2218	add	r5,r5,r9
2219	add	r9,r9,r0,ror#2
2220	eor	r3,r3,r11
2221	vld1.32	{q8},[r14,:128]!
2222	add	r8,r8,r2
2223	vsli.32	d24,d4,#13
2224	eor	r2,r6,r7
2225	eor	r0,r5,r5,ror#5
2226	veor	d25,d25,d24
2227	add	r9,r9,r3
2228	and	r2,r2,r5
2229	vadd.i32	d5,d5,d25
2230	eor	r3,r0,r5,ror#19
2231	eor	r0,r9,r9,ror#11
2232	vadd.i32	q8,q8,q2
2233	eor	r2,r2,r7
2234	add	r8,r8,r3,ror#6
2235	eor	r3,r9,r10
2236	eor	r0,r0,r9,ror#20
2237	add	r8,r8,r2
2238	ldr	r2,[sp,#48]
2239	and	r12,r12,r3
2240	add	r4,r4,r8
2241	vst1.32	{q8},[r1,:128]!
2242	add	r8,r8,r0,ror#2
2243	eor	r12,r12,r10
2244	vext.8	q8,q3,q0,#4
2245	add	r7,r7,r2
2246	eor	r2,r5,r6
2247	eor	r0,r4,r4,ror#5
2248	vext.8	q9,q1,q2,#4
2249	add	r8,r8,r12
2250	and	r2,r2,r4
2251	eor	r12,r0,r4,ror#19
2252	vshr.u32	q10,q8,#7
2253	eor	r0,r8,r8,ror#11
2254	eor	r2,r2,r6
2255	vadd.i32	q3,q3,q9
2256	add	r7,r7,r12,ror#6
2257	eor	r12,r8,r9
2258	vshr.u32	q9,q8,#3
2259	eor	r0,r0,r8,ror#20
2260	add	r7,r7,r2
2261	vsli.32	q10,q8,#25
2262	ldr	r2,[sp,#52]
2263	and	r3,r3,r12
2264	vshr.u32	q11,q8,#18
2265	add	r11,r11,r7
2266	add	r7,r7,r0,ror#2
2267	eor	r3,r3,r9
2268	veor	q9,q9,q10
2269	add	r6,r6,r2
2270	vsli.32	q11,q8,#14
2271	eor	r2,r4,r5
2272	eor	r0,r11,r11,ror#5
2273	vshr.u32	d24,d5,#17
2274	add	r7,r7,r3
2275	and	r2,r2,r11
2276	veor	q9,q9,q11
2277	eor	r3,r0,r11,ror#19
2278	eor	r0,r7,r7,ror#11
2279	vsli.32	d24,d5,#15
2280	eor	r2,r2,r5
2281	add	r6,r6,r3,ror#6
2282	vshr.u32	d25,d5,#10
2283	eor	r3,r7,r8
2284	eor	r0,r0,r7,ror#20
2285	vadd.i32	q3,q3,q9
2286	add	r6,r6,r2
2287	ldr	r2,[sp,#56]
2288	veor	d25,d25,d24
2289	and	r12,r12,r3
2290	add	r10,r10,r6
2291	vshr.u32	d24,d5,#19
2292	add	r6,r6,r0,ror#2
2293	eor	r12,r12,r8
2294	vsli.32	d24,d5,#13
2295	add	r5,r5,r2
2296	eor	r2,r11,r4
2297	veor	d25,d25,d24
2298	eor	r0,r10,r10,ror#5
2299	add	r6,r6,r12
2300	vadd.i32	d6,d6,d25
2301	and	r2,r2,r10
2302	eor	r12,r0,r10,ror#19
2303	vshr.u32	d24,d6,#17
2304	eor	r0,r6,r6,ror#11
2305	eor	r2,r2,r4
2306	vsli.32	d24,d6,#15
2307	add	r5,r5,r12,ror#6
2308	eor	r12,r6,r7
2309	vshr.u32	d25,d6,#10
2310	eor	r0,r0,r6,ror#20
2311	add	r5,r5,r2
2312	veor	d25,d25,d24
2313	ldr	r2,[sp,#60]
2314	and	r3,r3,r12
2315	vshr.u32	d24,d6,#19
2316	add	r9,r9,r5
2317	add	r5,r5,r0,ror#2
2318	eor	r3,r3,r7
2319	vld1.32	{q8},[r14,:128]!
2320	add	r4,r4,r2
2321	vsli.32	d24,d6,#13
2322	eor	r2,r10,r11
2323	eor	r0,r9,r9,ror#5
2324	veor	d25,d25,d24
2325	add	r5,r5,r3
2326	and	r2,r2,r9
2327	vadd.i32	d7,d7,d25
2328	eor	r3,r0,r9,ror#19
2329	eor	r0,r5,r5,ror#11
2330	vadd.i32	q8,q8,q3
2331	eor	r2,r2,r11
2332	add	r4,r4,r3,ror#6
2333	eor	r3,r5,r6
2334	eor	r0,r0,r5,ror#20
2335	add	r4,r4,r2
2336	ldr	r2,[r14]
2337	and	r12,r12,r3
2338	add	r8,r8,r4
2339	vst1.32	{q8},[r1,:128]!
2340	add	r4,r4,r0,ror#2
2341	eor	r12,r12,r6
2342	teq	r2,#0				@ check for K256 terminator
2343	ldr	r2,[sp,#0]
2344	sub	r1,r1,#64
2345	bne	.L_00_48
2346
2347	ldr	r1,[sp,#68]
2348	ldr	r0,[sp,#72]
2349	sub	r14,r14,#256	@ rewind r14
2350	teq	r1,r0
2351	it	eq
2352	subeq	r1,r1,#64		@ avoid SEGV
2353	vld1.8	{q0},[r1]!		@ load next input block
2354	vld1.8	{q1},[r1]!
2355	vld1.8	{q2},[r1]!
2356	vld1.8	{q3},[r1]!
2357	it	ne
2358	strne	r1,[sp,#68]
2359	mov	r1,sp
2360	add	r11,r11,r2
2361	eor	r2,r9,r10
2362	eor	r0,r8,r8,ror#5
2363	add	r4,r4,r12
2364	vld1.32	{q8},[r14,:128]!
2365	and	r2,r2,r8
2366	eor	r12,r0,r8,ror#19
2367	eor	r0,r4,r4,ror#11
2368	eor	r2,r2,r10
2369	vrev32.8	q0,q0
2370	add	r11,r11,r12,ror#6
2371	eor	r12,r4,r5
2372	eor	r0,r0,r4,ror#20
2373	add	r11,r11,r2
2374	vadd.i32	q8,q8,q0
2375	ldr	r2,[sp,#4]
2376	and	r3,r3,r12
2377	add	r7,r7,r11
2378	add	r11,r11,r0,ror#2
2379	eor	r3,r3,r5
2380	add	r10,r10,r2
2381	eor	r2,r8,r9
2382	eor	r0,r7,r7,ror#5
2383	add	r11,r11,r3
2384	and	r2,r2,r7
2385	eor	r3,r0,r7,ror#19
2386	eor	r0,r11,r11,ror#11
2387	eor	r2,r2,r9
2388	add	r10,r10,r3,ror#6
2389	eor	r3,r11,r4
2390	eor	r0,r0,r11,ror#20
2391	add	r10,r10,r2
2392	ldr	r2,[sp,#8]
2393	and	r12,r12,r3
2394	add	r6,r6,r10
2395	add	r10,r10,r0,ror#2
2396	eor	r12,r12,r4
2397	add	r9,r9,r2
2398	eor	r2,r7,r8
2399	eor	r0,r6,r6,ror#5
2400	add	r10,r10,r12
2401	and	r2,r2,r6
2402	eor	r12,r0,r6,ror#19
2403	eor	r0,r10,r10,ror#11
2404	eor	r2,r2,r8
2405	add	r9,r9,r12,ror#6
2406	eor	r12,r10,r11
2407	eor	r0,r0,r10,ror#20
2408	add	r9,r9,r2
2409	ldr	r2,[sp,#12]
2410	and	r3,r3,r12
2411	add	r5,r5,r9
2412	add	r9,r9,r0,ror#2
2413	eor	r3,r3,r11
2414	add	r8,r8,r2
2415	eor	r2,r6,r7
2416	eor	r0,r5,r5,ror#5
2417	add	r9,r9,r3
2418	and	r2,r2,r5
2419	eor	r3,r0,r5,ror#19
2420	eor	r0,r9,r9,ror#11
2421	eor	r2,r2,r7
2422	add	r8,r8,r3,ror#6
2423	eor	r3,r9,r10
2424	eor	r0,r0,r9,ror#20
2425	add	r8,r8,r2
2426	ldr	r2,[sp,#16]
2427	and	r12,r12,r3
2428	add	r4,r4,r8
2429	add	r8,r8,r0,ror#2
2430	eor	r12,r12,r10
2431	vst1.32	{q8},[r1,:128]!
2432	add	r7,r7,r2
2433	eor	r2,r5,r6
2434	eor	r0,r4,r4,ror#5
2435	add	r8,r8,r12
2436	vld1.32	{q8},[r14,:128]!
2437	and	r2,r2,r4
2438	eor	r12,r0,r4,ror#19
2439	eor	r0,r8,r8,ror#11
2440	eor	r2,r2,r6
2441	vrev32.8	q1,q1
2442	add	r7,r7,r12,ror#6
2443	eor	r12,r8,r9
2444	eor	r0,r0,r8,ror#20
2445	add	r7,r7,r2
2446	vadd.i32	q8,q8,q1
2447	ldr	r2,[sp,#20]
2448	and	r3,r3,r12
2449	add	r11,r11,r7
2450	add	r7,r7,r0,ror#2
2451	eor	r3,r3,r9
2452	add	r6,r6,r2
2453	eor	r2,r4,r5
2454	eor	r0,r11,r11,ror#5
2455	add	r7,r7,r3
2456	and	r2,r2,r11
2457	eor	r3,r0,r11,ror#19
2458	eor	r0,r7,r7,ror#11
2459	eor	r2,r2,r5
2460	add	r6,r6,r3,ror#6
2461	eor	r3,r7,r8
2462	eor	r0,r0,r7,ror#20
2463	add	r6,r6,r2
2464	ldr	r2,[sp,#24]
2465	and	r12,r12,r3
2466	add	r10,r10,r6
2467	add	r6,r6,r0,ror#2
2468	eor	r12,r12,r8
2469	add	r5,r5,r2
2470	eor	r2,r11,r4
2471	eor	r0,r10,r10,ror#5
2472	add	r6,r6,r12
2473	and	r2,r2,r10
2474	eor	r12,r0,r10,ror#19
2475	eor	r0,r6,r6,ror#11
2476	eor	r2,r2,r4
2477	add	r5,r5,r12,ror#6
2478	eor	r12,r6,r7
2479	eor	r0,r0,r6,ror#20
2480	add	r5,r5,r2
2481	ldr	r2,[sp,#28]
2482	and	r3,r3,r12
2483	add	r9,r9,r5
2484	add	r5,r5,r0,ror#2
2485	eor	r3,r3,r7
2486	add	r4,r4,r2
2487	eor	r2,r10,r11
2488	eor	r0,r9,r9,ror#5
2489	add	r5,r5,r3
2490	and	r2,r2,r9
2491	eor	r3,r0,r9,ror#19
2492	eor	r0,r5,r5,ror#11
2493	eor	r2,r2,r11
2494	add	r4,r4,r3,ror#6
2495	eor	r3,r5,r6
2496	eor	r0,r0,r5,ror#20
2497	add	r4,r4,r2
2498	ldr	r2,[sp,#32]
2499	and	r12,r12,r3
2500	add	r8,r8,r4
2501	add	r4,r4,r0,ror#2
2502	eor	r12,r12,r6
2503	vst1.32	{q8},[r1,:128]!
2504	add	r11,r11,r2
2505	eor	r2,r9,r10
2506	eor	r0,r8,r8,ror#5
2507	add	r4,r4,r12
2508	vld1.32	{q8},[r14,:128]!
2509	and	r2,r2,r8
2510	eor	r12,r0,r8,ror#19
2511	eor	r0,r4,r4,ror#11
2512	eor	r2,r2,r10
2513	vrev32.8	q2,q2
2514	add	r11,r11,r12,ror#6
2515	eor	r12,r4,r5
2516	eor	r0,r0,r4,ror#20
2517	add	r11,r11,r2
2518	vadd.i32	q8,q8,q2
2519	ldr	r2,[sp,#36]
2520	and	r3,r3,r12
2521	add	r7,r7,r11
2522	add	r11,r11,r0,ror#2
2523	eor	r3,r3,r5
2524	add	r10,r10,r2
2525	eor	r2,r8,r9
2526	eor	r0,r7,r7,ror#5
2527	add	r11,r11,r3
2528	and	r2,r2,r7
2529	eor	r3,r0,r7,ror#19
2530	eor	r0,r11,r11,ror#11
2531	eor	r2,r2,r9
2532	add	r10,r10,r3,ror#6
2533	eor	r3,r11,r4
2534	eor	r0,r0,r11,ror#20
2535	add	r10,r10,r2
2536	ldr	r2,[sp,#40]
2537	and	r12,r12,r3
2538	add	r6,r6,r10
2539	add	r10,r10,r0,ror#2
2540	eor	r12,r12,r4
2541	add	r9,r9,r2
2542	eor	r2,r7,r8
2543	eor	r0,r6,r6,ror#5
2544	add	r10,r10,r12
2545	and	r2,r2,r6
2546	eor	r12,r0,r6,ror#19
2547	eor	r0,r10,r10,ror#11
2548	eor	r2,r2,r8
2549	add	r9,r9,r12,ror#6
2550	eor	r12,r10,r11
2551	eor	r0,r0,r10,ror#20
2552	add	r9,r9,r2
2553	ldr	r2,[sp,#44]
2554	and	r3,r3,r12
2555	add	r5,r5,r9
2556	add	r9,r9,r0,ror#2
2557	eor	r3,r3,r11
2558	add	r8,r8,r2
2559	eor	r2,r6,r7
2560	eor	r0,r5,r5,ror#5
2561	add	r9,r9,r3
2562	and	r2,r2,r5
2563	eor	r3,r0,r5,ror#19
2564	eor	r0,r9,r9,ror#11
2565	eor	r2,r2,r7
2566	add	r8,r8,r3,ror#6
2567	eor	r3,r9,r10
2568	eor	r0,r0,r9,ror#20
2569	add	r8,r8,r2
2570	ldr	r2,[sp,#48]
2571	and	r12,r12,r3
2572	add	r4,r4,r8
2573	add	r8,r8,r0,ror#2
2574	eor	r12,r12,r10
2575	vst1.32	{q8},[r1,:128]!
2576	add	r7,r7,r2
2577	eor	r2,r5,r6
2578	eor	r0,r4,r4,ror#5
2579	add	r8,r8,r12
2580	vld1.32	{q8},[r14,:128]!
2581	and	r2,r2,r4
2582	eor	r12,r0,r4,ror#19
2583	eor	r0,r8,r8,ror#11
2584	eor	r2,r2,r6
2585	vrev32.8	q3,q3
2586	add	r7,r7,r12,ror#6
2587	eor	r12,r8,r9
2588	eor	r0,r0,r8,ror#20
2589	add	r7,r7,r2
2590	vadd.i32	q8,q8,q3
2591	ldr	r2,[sp,#52]
2592	and	r3,r3,r12
2593	add	r11,r11,r7
2594	add	r7,r7,r0,ror#2
2595	eor	r3,r3,r9
2596	add	r6,r6,r2
2597	eor	r2,r4,r5
2598	eor	r0,r11,r11,ror#5
2599	add	r7,r7,r3
2600	and	r2,r2,r11
2601	eor	r3,r0,r11,ror#19
2602	eor	r0,r7,r7,ror#11
2603	eor	r2,r2,r5
2604	add	r6,r6,r3,ror#6
2605	eor	r3,r7,r8
2606	eor	r0,r0,r7,ror#20
2607	add	r6,r6,r2
2608	ldr	r2,[sp,#56]
2609	and	r12,r12,r3
2610	add	r10,r10,r6
2611	add	r6,r6,r0,ror#2
2612	eor	r12,r12,r8
2613	add	r5,r5,r2
2614	eor	r2,r11,r4
2615	eor	r0,r10,r10,ror#5
2616	add	r6,r6,r12
2617	and	r2,r2,r10
2618	eor	r12,r0,r10,ror#19
2619	eor	r0,r6,r6,ror#11
2620	eor	r2,r2,r4
2621	add	r5,r5,r12,ror#6
2622	eor	r12,r6,r7
2623	eor	r0,r0,r6,ror#20
2624	add	r5,r5,r2
2625	ldr	r2,[sp,#60]
2626	and	r3,r3,r12
2627	add	r9,r9,r5
2628	add	r5,r5,r0,ror#2
2629	eor	r3,r3,r7
2630	add	r4,r4,r2
2631	eor	r2,r10,r11
2632	eor	r0,r9,r9,ror#5
2633	add	r5,r5,r3
2634	and	r2,r2,r9
2635	eor	r3,r0,r9,ror#19
2636	eor	r0,r5,r5,ror#11
2637	eor	r2,r2,r11
2638	add	r4,r4,r3,ror#6
2639	eor	r3,r5,r6
2640	eor	r0,r0,r5,ror#20
2641	add	r4,r4,r2
2642	ldr	r2,[sp,#64]
2643	and	r12,r12,r3
2644	add	r8,r8,r4
2645	add	r4,r4,r0,ror#2
2646	eor	r12,r12,r6
2647	vst1.32	{q8},[r1,:128]!
2648	ldr	r0,[r2,#0]
2649	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2650	ldr	r12,[r2,#4]
2651	ldr	r3,[r2,#8]
2652	ldr	r1,[r2,#12]
2653	add	r4,r4,r0			@ accumulate
2654	ldr	r0,[r2,#16]
2655	add	r5,r5,r12
2656	ldr	r12,[r2,#20]
2657	add	r6,r6,r3
2658	ldr	r3,[r2,#24]
2659	add	r7,r7,r1
2660	ldr	r1,[r2,#28]
2661	add	r8,r8,r0
2662	str	r4,[r2],#4
2663	add	r9,r9,r12
2664	str	r5,[r2],#4
2665	add	r10,r10,r3
2666	str	r6,[r2],#4
2667	add	r11,r11,r1
2668	str	r7,[r2],#4
2669	stmia	r2,{r8,r9,r10,r11}
2670
2671	ittte	ne
2672	movne	r1,sp
2673	ldrne	r2,[sp,#0]
2674	eorne	r12,r12,r12
2675	ldreq	sp,[sp,#76]			@ restore original sp
2676	itt	ne
2677	eorne	r3,r5,r6
2678	bne	.L_00_48
2679
2680	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2681.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2682#endif
2683#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2684
2685# if defined(__thumb2__)
2686#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2687# else
2688#  define INST(a,b,c,d)	.byte	a,b,c,d
2689# endif
2690
2691.type	sha256_block_data_order_armv8,%function
2692.align	5
2693sha256_block_data_order_armv8:
2694.LARMv8:
2695	vld1.32	{q0,q1},[r0]
2696	sub	r3,r3,#256+32
2697	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2698	b	.Loop_v8
2699
2700.align	4
2701.Loop_v8:
2702	vld1.8	{q8,q9},[r1]!
2703	vld1.8	{q10,q11},[r1]!
2704	vld1.32	{q12},[r3]!
2705	vrev32.8	q8,q8
2706	vrev32.8	q9,q9
2707	vrev32.8	q10,q10
2708	vrev32.8	q11,q11
2709	vmov	q14,q0	@ offload
2710	vmov	q15,q1
2711	teq	r1,r2
2712	vld1.32	{q13},[r3]!
2713	vadd.i32	q12,q12,q8
2714	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2715	vmov	q2,q0
2716	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2717	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2718	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2719	vld1.32	{q12},[r3]!
2720	vadd.i32	q13,q13,q9
2721	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2722	vmov	q2,q0
2723	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2724	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2725	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2726	vld1.32	{q13},[r3]!
2727	vadd.i32	q12,q12,q10
2728	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2729	vmov	q2,q0
2730	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2731	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2732	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2733	vld1.32	{q12},[r3]!
2734	vadd.i32	q13,q13,q11
2735	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2736	vmov	q2,q0
2737	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2738	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2739	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2740	vld1.32	{q13},[r3]!
2741	vadd.i32	q12,q12,q8
2742	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2743	vmov	q2,q0
2744	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2745	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2746	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2747	vld1.32	{q12},[r3]!
2748	vadd.i32	q13,q13,q9
2749	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2750	vmov	q2,q0
2751	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2752	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2753	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2754	vld1.32	{q13},[r3]!
2755	vadd.i32	q12,q12,q10
2756	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2757	vmov	q2,q0
2758	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2759	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2760	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2761	vld1.32	{q12},[r3]!
2762	vadd.i32	q13,q13,q11
2763	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2764	vmov	q2,q0
2765	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2766	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2767	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2768	vld1.32	{q13},[r3]!
2769	vadd.i32	q12,q12,q8
2770	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2771	vmov	q2,q0
2772	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2773	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2774	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2775	vld1.32	{q12},[r3]!
2776	vadd.i32	q13,q13,q9
2777	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2778	vmov	q2,q0
2779	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2780	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2781	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2782	vld1.32	{q13},[r3]!
2783	vadd.i32	q12,q12,q10
2784	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2785	vmov	q2,q0
2786	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2787	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2788	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2789	vld1.32	{q12},[r3]!
2790	vadd.i32	q13,q13,q11
2791	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2792	vmov	q2,q0
2793	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2794	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2795	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2796	vld1.32	{q13},[r3]!
2797	vadd.i32	q12,q12,q8
2798	vmov	q2,q0
2799	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2800	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2801
2802	vld1.32	{q12},[r3]!
2803	vadd.i32	q13,q13,q9
2804	vmov	q2,q0
2805	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2806	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2807
2808	vld1.32	{q13},[r3]
2809	vadd.i32	q12,q12,q10
2810	sub	r3,r3,#256-16	@ rewind
2811	vmov	q2,q0
2812	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2813	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2814
2815	vadd.i32	q13,q13,q11
2816	vmov	q2,q0
2817	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2818	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2819
2820	vadd.i32	q0,q0,q14
2821	vadd.i32	q1,q1,q15
2822	it	ne
2823	bne	.Loop_v8
2824
2825	vst1.32	{q0,q1},[r0]
2826
2827	bx	lr		@ bx lr
2828.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2829#endif
2830.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2831.align	2
2832.align	2
2833#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2834.comm	OPENSSL_armcap_P,4,4
2835.hidden	OPENSSL_armcap_P
2836#endif
2837#endif
2838#endif  // !OPENSSL_NO_ASM
2839.section	.note.GNU-stack,"",%progbits
2840