1#if defined(__arm__)
2@ Copyright 2007-2016 The OpenSSL Project Authors. All Rights Reserved.
3@
4@ Licensed under the OpenSSL license (the "License").  You may not use
5@ this file except in compliance with the License.  You can obtain a copy
6@ in the file LICENSE in the source distribution or at
7@ https://www.openssl.org/source/license.html
8
9
10@ ====================================================================
11@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12@ project. The module is, however, dual licensed under OpenSSL and
13@ CRYPTOGAMS licenses depending on where you obtain it. For further
14@ details see http://www.openssl.org/~appro/cryptogams/.
15@
16@ Permission to use under GPL terms is granted.
17@ ====================================================================
18
19@ SHA256 block procedure for ARMv4. May 2007.
20
21@ Performance is ~2x better than gcc 3.4 generated code and in "abso-
22@ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per
23@ byte [on single-issue Xscale PXA250 core].
24
25@ July 2010.
26@
27@ Rescheduling for dual-issue pipeline resulted in 22% improvement on
28@ Cortex A8 core and ~20 cycles per processed byte.
29
30@ February 2011.
31@
32@ Profiler-assisted and platform-specific optimization resulted in 16%
33@ improvement on Cortex A8 core and ~15.4 cycles per processed byte.
34
35@ September 2013.
36@
37@ Add NEON implementation. On Cortex A8 it was measured to process one
38@ byte in 12.5 cycles or 23% faster than integer-only code. Snapdragon
39@ S4 does it in 12.5 cycles too, but it's 50% faster than integer-only
40@ code (meaning that latter performs sub-optimally, nothing was done
41@ about it).
42
43@ May 2014.
44@
45@ Add ARMv8 code path performing at 2.0 cpb on Apple A7.
46
47#ifndef __KERNEL__
48# include <openssl/arm_arch.h>
49#else
50# define __ARM_ARCH__ __LINUX_ARM_ARCH__
51# define __ARM_MAX_ARCH__ 7
52#endif
53
54.text
55#if defined(__thumb2__)
56.syntax	unified
57.thumb
58#else
59.code	32
60#endif
61
62.type	K256,%object
63.align	5
64K256:
65.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
66.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
67.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
68.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
69.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
70.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
71.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
72.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
73.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
74.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
75.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
76.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
77.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
78.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
79.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
80.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
81.size	K256,.-K256
82.word	0				@ terminator
83#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
84.LOPENSSL_armcap:
85.word	OPENSSL_armcap_P-.Lsha256_block_data_order
86#endif
87.align	5
88
89.globl	sha256_block_data_order
90.hidden	sha256_block_data_order
91.type	sha256_block_data_order,%function
92sha256_block_data_order:
93.Lsha256_block_data_order:
94#if __ARM_ARCH__<7 && !defined(__thumb2__)
95	sub	r3,pc,#8		@ sha256_block_data_order
96#else
97	adr	r3,.Lsha256_block_data_order
98#endif
99#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
100	ldr	r12,.LOPENSSL_armcap
101	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
102#ifdef	__APPLE__
103	ldr	r12,[r12]
104#endif
105	tst	r12,#ARMV8_SHA256
106	bne	.LARMv8
107	tst	r12,#ARMV7_NEON
108	bne	.LNEON
109#endif
110	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
111	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
112	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
113	sub	r14,r3,#256+32	@ K256
114	sub	sp,sp,#16*4		@ alloca(X[16])
115.Loop:
116# if __ARM_ARCH__>=7
117	ldr	r2,[r1],#4
118# else
119	ldrb	r2,[r1,#3]
120# endif
121	eor	r3,r5,r6		@ magic
122	eor	r12,r12,r12
123#if __ARM_ARCH__>=7
124	@ ldr	r2,[r1],#4			@ 0
125# if 0==15
126	str	r1,[sp,#17*4]			@ make room for r1
127# endif
128	eor	r0,r8,r8,ror#5
129	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
130	eor	r0,r0,r8,ror#19	@ Sigma1(e)
131# ifndef __ARMEB__
132	rev	r2,r2
133# endif
134#else
135	@ ldrb	r2,[r1,#3]			@ 0
136	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
137	ldrb	r12,[r1,#2]
138	ldrb	r0,[r1,#1]
139	orr	r2,r2,r12,lsl#8
140	ldrb	r12,[r1],#4
141	orr	r2,r2,r0,lsl#16
142# if 0==15
143	str	r1,[sp,#17*4]			@ make room for r1
144# endif
145	eor	r0,r8,r8,ror#5
146	orr	r2,r2,r12,lsl#24
147	eor	r0,r0,r8,ror#19	@ Sigma1(e)
148#endif
149	ldr	r12,[r14],#4			@ *K256++
150	add	r11,r11,r2			@ h+=X[i]
151	str	r2,[sp,#0*4]
152	eor	r2,r9,r10
153	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
154	and	r2,r2,r8
155	add	r11,r11,r12			@ h+=K256[i]
156	eor	r2,r2,r10			@ Ch(e,f,g)
157	eor	r0,r4,r4,ror#11
158	add	r11,r11,r2			@ h+=Ch(e,f,g)
159#if 0==31
160	and	r12,r12,#0xff
161	cmp	r12,#0xf2			@ done?
162#endif
163#if 0<15
164# if __ARM_ARCH__>=7
165	ldr	r2,[r1],#4			@ prefetch
166# else
167	ldrb	r2,[r1,#3]
168# endif
169	eor	r12,r4,r5			@ a^b, b^c in next round
170#else
171	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
172	eor	r12,r4,r5			@ a^b, b^c in next round
173	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
174#endif
175	eor	r0,r0,r4,ror#20	@ Sigma0(a)
176	and	r3,r3,r12			@ (b^c)&=(a^b)
177	add	r7,r7,r11			@ d+=h
178	eor	r3,r3,r5			@ Maj(a,b,c)
179	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
180	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
181#if __ARM_ARCH__>=7
182	@ ldr	r2,[r1],#4			@ 1
183# if 1==15
184	str	r1,[sp,#17*4]			@ make room for r1
185# endif
186	eor	r0,r7,r7,ror#5
187	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
188	eor	r0,r0,r7,ror#19	@ Sigma1(e)
189# ifndef __ARMEB__
190	rev	r2,r2
191# endif
192#else
193	@ ldrb	r2,[r1,#3]			@ 1
194	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
195	ldrb	r3,[r1,#2]
196	ldrb	r0,[r1,#1]
197	orr	r2,r2,r3,lsl#8
198	ldrb	r3,[r1],#4
199	orr	r2,r2,r0,lsl#16
200# if 1==15
201	str	r1,[sp,#17*4]			@ make room for r1
202# endif
203	eor	r0,r7,r7,ror#5
204	orr	r2,r2,r3,lsl#24
205	eor	r0,r0,r7,ror#19	@ Sigma1(e)
206#endif
207	ldr	r3,[r14],#4			@ *K256++
208	add	r10,r10,r2			@ h+=X[i]
209	str	r2,[sp,#1*4]
210	eor	r2,r8,r9
211	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
212	and	r2,r2,r7
213	add	r10,r10,r3			@ h+=K256[i]
214	eor	r2,r2,r9			@ Ch(e,f,g)
215	eor	r0,r11,r11,ror#11
216	add	r10,r10,r2			@ h+=Ch(e,f,g)
217#if 1==31
218	and	r3,r3,#0xff
219	cmp	r3,#0xf2			@ done?
220#endif
221#if 1<15
222# if __ARM_ARCH__>=7
223	ldr	r2,[r1],#4			@ prefetch
224# else
225	ldrb	r2,[r1,#3]
226# endif
227	eor	r3,r11,r4			@ a^b, b^c in next round
228#else
229	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
230	eor	r3,r11,r4			@ a^b, b^c in next round
231	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
232#endif
233	eor	r0,r0,r11,ror#20	@ Sigma0(a)
234	and	r12,r12,r3			@ (b^c)&=(a^b)
235	add	r6,r6,r10			@ d+=h
236	eor	r12,r12,r4			@ Maj(a,b,c)
237	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
238	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
239#if __ARM_ARCH__>=7
240	@ ldr	r2,[r1],#4			@ 2
241# if 2==15
242	str	r1,[sp,#17*4]			@ make room for r1
243# endif
244	eor	r0,r6,r6,ror#5
245	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
246	eor	r0,r0,r6,ror#19	@ Sigma1(e)
247# ifndef __ARMEB__
248	rev	r2,r2
249# endif
250#else
251	@ ldrb	r2,[r1,#3]			@ 2
252	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
253	ldrb	r12,[r1,#2]
254	ldrb	r0,[r1,#1]
255	orr	r2,r2,r12,lsl#8
256	ldrb	r12,[r1],#4
257	orr	r2,r2,r0,lsl#16
258# if 2==15
259	str	r1,[sp,#17*4]			@ make room for r1
260# endif
261	eor	r0,r6,r6,ror#5
262	orr	r2,r2,r12,lsl#24
263	eor	r0,r0,r6,ror#19	@ Sigma1(e)
264#endif
265	ldr	r12,[r14],#4			@ *K256++
266	add	r9,r9,r2			@ h+=X[i]
267	str	r2,[sp,#2*4]
268	eor	r2,r7,r8
269	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
270	and	r2,r2,r6
271	add	r9,r9,r12			@ h+=K256[i]
272	eor	r2,r2,r8			@ Ch(e,f,g)
273	eor	r0,r10,r10,ror#11
274	add	r9,r9,r2			@ h+=Ch(e,f,g)
275#if 2==31
276	and	r12,r12,#0xff
277	cmp	r12,#0xf2			@ done?
278#endif
279#if 2<15
280# if __ARM_ARCH__>=7
281	ldr	r2,[r1],#4			@ prefetch
282# else
283	ldrb	r2,[r1,#3]
284# endif
285	eor	r12,r10,r11			@ a^b, b^c in next round
286#else
287	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
288	eor	r12,r10,r11			@ a^b, b^c in next round
289	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
290#endif
291	eor	r0,r0,r10,ror#20	@ Sigma0(a)
292	and	r3,r3,r12			@ (b^c)&=(a^b)
293	add	r5,r5,r9			@ d+=h
294	eor	r3,r3,r11			@ Maj(a,b,c)
295	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
296	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
297#if __ARM_ARCH__>=7
298	@ ldr	r2,[r1],#4			@ 3
299# if 3==15
300	str	r1,[sp,#17*4]			@ make room for r1
301# endif
302	eor	r0,r5,r5,ror#5
303	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
304	eor	r0,r0,r5,ror#19	@ Sigma1(e)
305# ifndef __ARMEB__
306	rev	r2,r2
307# endif
308#else
309	@ ldrb	r2,[r1,#3]			@ 3
310	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
311	ldrb	r3,[r1,#2]
312	ldrb	r0,[r1,#1]
313	orr	r2,r2,r3,lsl#8
314	ldrb	r3,[r1],#4
315	orr	r2,r2,r0,lsl#16
316# if 3==15
317	str	r1,[sp,#17*4]			@ make room for r1
318# endif
319	eor	r0,r5,r5,ror#5
320	orr	r2,r2,r3,lsl#24
321	eor	r0,r0,r5,ror#19	@ Sigma1(e)
322#endif
323	ldr	r3,[r14],#4			@ *K256++
324	add	r8,r8,r2			@ h+=X[i]
325	str	r2,[sp,#3*4]
326	eor	r2,r6,r7
327	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
328	and	r2,r2,r5
329	add	r8,r8,r3			@ h+=K256[i]
330	eor	r2,r2,r7			@ Ch(e,f,g)
331	eor	r0,r9,r9,ror#11
332	add	r8,r8,r2			@ h+=Ch(e,f,g)
333#if 3==31
334	and	r3,r3,#0xff
335	cmp	r3,#0xf2			@ done?
336#endif
337#if 3<15
338# if __ARM_ARCH__>=7
339	ldr	r2,[r1],#4			@ prefetch
340# else
341	ldrb	r2,[r1,#3]
342# endif
343	eor	r3,r9,r10			@ a^b, b^c in next round
344#else
345	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
346	eor	r3,r9,r10			@ a^b, b^c in next round
347	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
348#endif
349	eor	r0,r0,r9,ror#20	@ Sigma0(a)
350	and	r12,r12,r3			@ (b^c)&=(a^b)
351	add	r4,r4,r8			@ d+=h
352	eor	r12,r12,r10			@ Maj(a,b,c)
353	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
354	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
355#if __ARM_ARCH__>=7
356	@ ldr	r2,[r1],#4			@ 4
357# if 4==15
358	str	r1,[sp,#17*4]			@ make room for r1
359# endif
360	eor	r0,r4,r4,ror#5
361	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
362	eor	r0,r0,r4,ror#19	@ Sigma1(e)
363# ifndef __ARMEB__
364	rev	r2,r2
365# endif
366#else
367	@ ldrb	r2,[r1,#3]			@ 4
368	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
369	ldrb	r12,[r1,#2]
370	ldrb	r0,[r1,#1]
371	orr	r2,r2,r12,lsl#8
372	ldrb	r12,[r1],#4
373	orr	r2,r2,r0,lsl#16
374# if 4==15
375	str	r1,[sp,#17*4]			@ make room for r1
376# endif
377	eor	r0,r4,r4,ror#5
378	orr	r2,r2,r12,lsl#24
379	eor	r0,r0,r4,ror#19	@ Sigma1(e)
380#endif
381	ldr	r12,[r14],#4			@ *K256++
382	add	r7,r7,r2			@ h+=X[i]
383	str	r2,[sp,#4*4]
384	eor	r2,r5,r6
385	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
386	and	r2,r2,r4
387	add	r7,r7,r12			@ h+=K256[i]
388	eor	r2,r2,r6			@ Ch(e,f,g)
389	eor	r0,r8,r8,ror#11
390	add	r7,r7,r2			@ h+=Ch(e,f,g)
391#if 4==31
392	and	r12,r12,#0xff
393	cmp	r12,#0xf2			@ done?
394#endif
395#if 4<15
396# if __ARM_ARCH__>=7
397	ldr	r2,[r1],#4			@ prefetch
398# else
399	ldrb	r2,[r1,#3]
400# endif
401	eor	r12,r8,r9			@ a^b, b^c in next round
402#else
403	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
404	eor	r12,r8,r9			@ a^b, b^c in next round
405	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
406#endif
407	eor	r0,r0,r8,ror#20	@ Sigma0(a)
408	and	r3,r3,r12			@ (b^c)&=(a^b)
409	add	r11,r11,r7			@ d+=h
410	eor	r3,r3,r9			@ Maj(a,b,c)
411	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
412	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
413#if __ARM_ARCH__>=7
414	@ ldr	r2,[r1],#4			@ 5
415# if 5==15
416	str	r1,[sp,#17*4]			@ make room for r1
417# endif
418	eor	r0,r11,r11,ror#5
419	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
420	eor	r0,r0,r11,ror#19	@ Sigma1(e)
421# ifndef __ARMEB__
422	rev	r2,r2
423# endif
424#else
425	@ ldrb	r2,[r1,#3]			@ 5
426	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
427	ldrb	r3,[r1,#2]
428	ldrb	r0,[r1,#1]
429	orr	r2,r2,r3,lsl#8
430	ldrb	r3,[r1],#4
431	orr	r2,r2,r0,lsl#16
432# if 5==15
433	str	r1,[sp,#17*4]			@ make room for r1
434# endif
435	eor	r0,r11,r11,ror#5
436	orr	r2,r2,r3,lsl#24
437	eor	r0,r0,r11,ror#19	@ Sigma1(e)
438#endif
439	ldr	r3,[r14],#4			@ *K256++
440	add	r6,r6,r2			@ h+=X[i]
441	str	r2,[sp,#5*4]
442	eor	r2,r4,r5
443	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
444	and	r2,r2,r11
445	add	r6,r6,r3			@ h+=K256[i]
446	eor	r2,r2,r5			@ Ch(e,f,g)
447	eor	r0,r7,r7,ror#11
448	add	r6,r6,r2			@ h+=Ch(e,f,g)
449#if 5==31
450	and	r3,r3,#0xff
451	cmp	r3,#0xf2			@ done?
452#endif
453#if 5<15
454# if __ARM_ARCH__>=7
455	ldr	r2,[r1],#4			@ prefetch
456# else
457	ldrb	r2,[r1,#3]
458# endif
459	eor	r3,r7,r8			@ a^b, b^c in next round
460#else
461	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
462	eor	r3,r7,r8			@ a^b, b^c in next round
463	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
464#endif
465	eor	r0,r0,r7,ror#20	@ Sigma0(a)
466	and	r12,r12,r3			@ (b^c)&=(a^b)
467	add	r10,r10,r6			@ d+=h
468	eor	r12,r12,r8			@ Maj(a,b,c)
469	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
470	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
471#if __ARM_ARCH__>=7
472	@ ldr	r2,[r1],#4			@ 6
473# if 6==15
474	str	r1,[sp,#17*4]			@ make room for r1
475# endif
476	eor	r0,r10,r10,ror#5
477	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
478	eor	r0,r0,r10,ror#19	@ Sigma1(e)
479# ifndef __ARMEB__
480	rev	r2,r2
481# endif
482#else
483	@ ldrb	r2,[r1,#3]			@ 6
484	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
485	ldrb	r12,[r1,#2]
486	ldrb	r0,[r1,#1]
487	orr	r2,r2,r12,lsl#8
488	ldrb	r12,[r1],#4
489	orr	r2,r2,r0,lsl#16
490# if 6==15
491	str	r1,[sp,#17*4]			@ make room for r1
492# endif
493	eor	r0,r10,r10,ror#5
494	orr	r2,r2,r12,lsl#24
495	eor	r0,r0,r10,ror#19	@ Sigma1(e)
496#endif
497	ldr	r12,[r14],#4			@ *K256++
498	add	r5,r5,r2			@ h+=X[i]
499	str	r2,[sp,#6*4]
500	eor	r2,r11,r4
501	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
502	and	r2,r2,r10
503	add	r5,r5,r12			@ h+=K256[i]
504	eor	r2,r2,r4			@ Ch(e,f,g)
505	eor	r0,r6,r6,ror#11
506	add	r5,r5,r2			@ h+=Ch(e,f,g)
507#if 6==31
508	and	r12,r12,#0xff
509	cmp	r12,#0xf2			@ done?
510#endif
511#if 6<15
512# if __ARM_ARCH__>=7
513	ldr	r2,[r1],#4			@ prefetch
514# else
515	ldrb	r2,[r1,#3]
516# endif
517	eor	r12,r6,r7			@ a^b, b^c in next round
518#else
519	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
520	eor	r12,r6,r7			@ a^b, b^c in next round
521	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
522#endif
523	eor	r0,r0,r6,ror#20	@ Sigma0(a)
524	and	r3,r3,r12			@ (b^c)&=(a^b)
525	add	r9,r9,r5			@ d+=h
526	eor	r3,r3,r7			@ Maj(a,b,c)
527	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
528	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
529#if __ARM_ARCH__>=7
530	@ ldr	r2,[r1],#4			@ 7
531# if 7==15
532	str	r1,[sp,#17*4]			@ make room for r1
533# endif
534	eor	r0,r9,r9,ror#5
535	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
536	eor	r0,r0,r9,ror#19	@ Sigma1(e)
537# ifndef __ARMEB__
538	rev	r2,r2
539# endif
540#else
541	@ ldrb	r2,[r1,#3]			@ 7
542	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
543	ldrb	r3,[r1,#2]
544	ldrb	r0,[r1,#1]
545	orr	r2,r2,r3,lsl#8
546	ldrb	r3,[r1],#4
547	orr	r2,r2,r0,lsl#16
548# if 7==15
549	str	r1,[sp,#17*4]			@ make room for r1
550# endif
551	eor	r0,r9,r9,ror#5
552	orr	r2,r2,r3,lsl#24
553	eor	r0,r0,r9,ror#19	@ Sigma1(e)
554#endif
555	ldr	r3,[r14],#4			@ *K256++
556	add	r4,r4,r2			@ h+=X[i]
557	str	r2,[sp,#7*4]
558	eor	r2,r10,r11
559	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
560	and	r2,r2,r9
561	add	r4,r4,r3			@ h+=K256[i]
562	eor	r2,r2,r11			@ Ch(e,f,g)
563	eor	r0,r5,r5,ror#11
564	add	r4,r4,r2			@ h+=Ch(e,f,g)
565#if 7==31
566	and	r3,r3,#0xff
567	cmp	r3,#0xf2			@ done?
568#endif
569#if 7<15
570# if __ARM_ARCH__>=7
571	ldr	r2,[r1],#4			@ prefetch
572# else
573	ldrb	r2,[r1,#3]
574# endif
575	eor	r3,r5,r6			@ a^b, b^c in next round
576#else
577	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
578	eor	r3,r5,r6			@ a^b, b^c in next round
579	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
580#endif
581	eor	r0,r0,r5,ror#20	@ Sigma0(a)
582	and	r12,r12,r3			@ (b^c)&=(a^b)
583	add	r8,r8,r4			@ d+=h
584	eor	r12,r12,r6			@ Maj(a,b,c)
585	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
586	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
587#if __ARM_ARCH__>=7
588	@ ldr	r2,[r1],#4			@ 8
589# if 8==15
590	str	r1,[sp,#17*4]			@ make room for r1
591# endif
592	eor	r0,r8,r8,ror#5
593	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
594	eor	r0,r0,r8,ror#19	@ Sigma1(e)
595# ifndef __ARMEB__
596	rev	r2,r2
597# endif
598#else
599	@ ldrb	r2,[r1,#3]			@ 8
600	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
601	ldrb	r12,[r1,#2]
602	ldrb	r0,[r1,#1]
603	orr	r2,r2,r12,lsl#8
604	ldrb	r12,[r1],#4
605	orr	r2,r2,r0,lsl#16
606# if 8==15
607	str	r1,[sp,#17*4]			@ make room for r1
608# endif
609	eor	r0,r8,r8,ror#5
610	orr	r2,r2,r12,lsl#24
611	eor	r0,r0,r8,ror#19	@ Sigma1(e)
612#endif
613	ldr	r12,[r14],#4			@ *K256++
614	add	r11,r11,r2			@ h+=X[i]
615	str	r2,[sp,#8*4]
616	eor	r2,r9,r10
617	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
618	and	r2,r2,r8
619	add	r11,r11,r12			@ h+=K256[i]
620	eor	r2,r2,r10			@ Ch(e,f,g)
621	eor	r0,r4,r4,ror#11
622	add	r11,r11,r2			@ h+=Ch(e,f,g)
623#if 8==31
624	and	r12,r12,#0xff
625	cmp	r12,#0xf2			@ done?
626#endif
627#if 8<15
628# if __ARM_ARCH__>=7
629	ldr	r2,[r1],#4			@ prefetch
630# else
631	ldrb	r2,[r1,#3]
632# endif
633	eor	r12,r4,r5			@ a^b, b^c in next round
634#else
635	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
636	eor	r12,r4,r5			@ a^b, b^c in next round
637	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
638#endif
639	eor	r0,r0,r4,ror#20	@ Sigma0(a)
640	and	r3,r3,r12			@ (b^c)&=(a^b)
641	add	r7,r7,r11			@ d+=h
642	eor	r3,r3,r5			@ Maj(a,b,c)
643	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
644	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
645#if __ARM_ARCH__>=7
646	@ ldr	r2,[r1],#4			@ 9
647# if 9==15
648	str	r1,[sp,#17*4]			@ make room for r1
649# endif
650	eor	r0,r7,r7,ror#5
651	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
652	eor	r0,r0,r7,ror#19	@ Sigma1(e)
653# ifndef __ARMEB__
654	rev	r2,r2
655# endif
656#else
657	@ ldrb	r2,[r1,#3]			@ 9
658	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
659	ldrb	r3,[r1,#2]
660	ldrb	r0,[r1,#1]
661	orr	r2,r2,r3,lsl#8
662	ldrb	r3,[r1],#4
663	orr	r2,r2,r0,lsl#16
664# if 9==15
665	str	r1,[sp,#17*4]			@ make room for r1
666# endif
667	eor	r0,r7,r7,ror#5
668	orr	r2,r2,r3,lsl#24
669	eor	r0,r0,r7,ror#19	@ Sigma1(e)
670#endif
671	ldr	r3,[r14],#4			@ *K256++
672	add	r10,r10,r2			@ h+=X[i]
673	str	r2,[sp,#9*4]
674	eor	r2,r8,r9
675	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
676	and	r2,r2,r7
677	add	r10,r10,r3			@ h+=K256[i]
678	eor	r2,r2,r9			@ Ch(e,f,g)
679	eor	r0,r11,r11,ror#11
680	add	r10,r10,r2			@ h+=Ch(e,f,g)
681#if 9==31
682	and	r3,r3,#0xff
683	cmp	r3,#0xf2			@ done?
684#endif
685#if 9<15
686# if __ARM_ARCH__>=7
687	ldr	r2,[r1],#4			@ prefetch
688# else
689	ldrb	r2,[r1,#3]
690# endif
691	eor	r3,r11,r4			@ a^b, b^c in next round
692#else
693	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
694	eor	r3,r11,r4			@ a^b, b^c in next round
695	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
696#endif
697	eor	r0,r0,r11,ror#20	@ Sigma0(a)
698	and	r12,r12,r3			@ (b^c)&=(a^b)
699	add	r6,r6,r10			@ d+=h
700	eor	r12,r12,r4			@ Maj(a,b,c)
701	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
702	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
703#if __ARM_ARCH__>=7
704	@ ldr	r2,[r1],#4			@ 10
705# if 10==15
706	str	r1,[sp,#17*4]			@ make room for r1
707# endif
708	eor	r0,r6,r6,ror#5
709	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
710	eor	r0,r0,r6,ror#19	@ Sigma1(e)
711# ifndef __ARMEB__
712	rev	r2,r2
713# endif
714#else
715	@ ldrb	r2,[r1,#3]			@ 10
716	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
717	ldrb	r12,[r1,#2]
718	ldrb	r0,[r1,#1]
719	orr	r2,r2,r12,lsl#8
720	ldrb	r12,[r1],#4
721	orr	r2,r2,r0,lsl#16
722# if 10==15
723	str	r1,[sp,#17*4]			@ make room for r1
724# endif
725	eor	r0,r6,r6,ror#5
726	orr	r2,r2,r12,lsl#24
727	eor	r0,r0,r6,ror#19	@ Sigma1(e)
728#endif
729	ldr	r12,[r14],#4			@ *K256++
730	add	r9,r9,r2			@ h+=X[i]
731	str	r2,[sp,#10*4]
732	eor	r2,r7,r8
733	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
734	and	r2,r2,r6
735	add	r9,r9,r12			@ h+=K256[i]
736	eor	r2,r2,r8			@ Ch(e,f,g)
737	eor	r0,r10,r10,ror#11
738	add	r9,r9,r2			@ h+=Ch(e,f,g)
739#if 10==31
740	and	r12,r12,#0xff
741	cmp	r12,#0xf2			@ done?
742#endif
743#if 10<15
744# if __ARM_ARCH__>=7
745	ldr	r2,[r1],#4			@ prefetch
746# else
747	ldrb	r2,[r1,#3]
748# endif
749	eor	r12,r10,r11			@ a^b, b^c in next round
750#else
751	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
752	eor	r12,r10,r11			@ a^b, b^c in next round
753	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
754#endif
755	eor	r0,r0,r10,ror#20	@ Sigma0(a)
756	and	r3,r3,r12			@ (b^c)&=(a^b)
757	add	r5,r5,r9			@ d+=h
758	eor	r3,r3,r11			@ Maj(a,b,c)
759	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
760	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
761#if __ARM_ARCH__>=7
762	@ ldr	r2,[r1],#4			@ 11
763# if 11==15
764	str	r1,[sp,#17*4]			@ make room for r1
765# endif
766	eor	r0,r5,r5,ror#5
767	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
768	eor	r0,r0,r5,ror#19	@ Sigma1(e)
769# ifndef __ARMEB__
770	rev	r2,r2
771# endif
772#else
773	@ ldrb	r2,[r1,#3]			@ 11
774	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
775	ldrb	r3,[r1,#2]
776	ldrb	r0,[r1,#1]
777	orr	r2,r2,r3,lsl#8
778	ldrb	r3,[r1],#4
779	orr	r2,r2,r0,lsl#16
780# if 11==15
781	str	r1,[sp,#17*4]			@ make room for r1
782# endif
783	eor	r0,r5,r5,ror#5
784	orr	r2,r2,r3,lsl#24
785	eor	r0,r0,r5,ror#19	@ Sigma1(e)
786#endif
787	ldr	r3,[r14],#4			@ *K256++
788	add	r8,r8,r2			@ h+=X[i]
789	str	r2,[sp,#11*4]
790	eor	r2,r6,r7
791	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
792	and	r2,r2,r5
793	add	r8,r8,r3			@ h+=K256[i]
794	eor	r2,r2,r7			@ Ch(e,f,g)
795	eor	r0,r9,r9,ror#11
796	add	r8,r8,r2			@ h+=Ch(e,f,g)
797#if 11==31
798	and	r3,r3,#0xff
799	cmp	r3,#0xf2			@ done?
800#endif
801#if 11<15
802# if __ARM_ARCH__>=7
803	ldr	r2,[r1],#4			@ prefetch
804# else
805	ldrb	r2,[r1,#3]
806# endif
807	eor	r3,r9,r10			@ a^b, b^c in next round
808#else
809	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
810	eor	r3,r9,r10			@ a^b, b^c in next round
811	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
812#endif
813	eor	r0,r0,r9,ror#20	@ Sigma0(a)
814	and	r12,r12,r3			@ (b^c)&=(a^b)
815	add	r4,r4,r8			@ d+=h
816	eor	r12,r12,r10			@ Maj(a,b,c)
817	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
818	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
819#if __ARM_ARCH__>=7
820	@ ldr	r2,[r1],#4			@ 12
821# if 12==15
822	str	r1,[sp,#17*4]			@ make room for r1
823# endif
824	eor	r0,r4,r4,ror#5
825	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
826	eor	r0,r0,r4,ror#19	@ Sigma1(e)
827# ifndef __ARMEB__
828	rev	r2,r2
829# endif
830#else
831	@ ldrb	r2,[r1,#3]			@ 12
832	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
833	ldrb	r12,[r1,#2]
834	ldrb	r0,[r1,#1]
835	orr	r2,r2,r12,lsl#8
836	ldrb	r12,[r1],#4
837	orr	r2,r2,r0,lsl#16
838# if 12==15
839	str	r1,[sp,#17*4]			@ make room for r1
840# endif
841	eor	r0,r4,r4,ror#5
842	orr	r2,r2,r12,lsl#24
843	eor	r0,r0,r4,ror#19	@ Sigma1(e)
844#endif
845	ldr	r12,[r14],#4			@ *K256++
846	add	r7,r7,r2			@ h+=X[i]
847	str	r2,[sp,#12*4]
848	eor	r2,r5,r6
849	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
850	and	r2,r2,r4
851	add	r7,r7,r12			@ h+=K256[i]
852	eor	r2,r2,r6			@ Ch(e,f,g)
853	eor	r0,r8,r8,ror#11
854	add	r7,r7,r2			@ h+=Ch(e,f,g)
855#if 12==31
856	and	r12,r12,#0xff
857	cmp	r12,#0xf2			@ done?
858#endif
859#if 12<15
860# if __ARM_ARCH__>=7
861	ldr	r2,[r1],#4			@ prefetch
862# else
863	ldrb	r2,[r1,#3]
864# endif
865	eor	r12,r8,r9			@ a^b, b^c in next round
866#else
867	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
868	eor	r12,r8,r9			@ a^b, b^c in next round
869	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
870#endif
871	eor	r0,r0,r8,ror#20	@ Sigma0(a)
872	and	r3,r3,r12			@ (b^c)&=(a^b)
873	add	r11,r11,r7			@ d+=h
874	eor	r3,r3,r9			@ Maj(a,b,c)
875	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
876	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
877#if __ARM_ARCH__>=7
878	@ ldr	r2,[r1],#4			@ 13
879# if 13==15
880	str	r1,[sp,#17*4]			@ make room for r1
881# endif
882	eor	r0,r11,r11,ror#5
883	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
884	eor	r0,r0,r11,ror#19	@ Sigma1(e)
885# ifndef __ARMEB__
886	rev	r2,r2
887# endif
888#else
889	@ ldrb	r2,[r1,#3]			@ 13
890	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
891	ldrb	r3,[r1,#2]
892	ldrb	r0,[r1,#1]
893	orr	r2,r2,r3,lsl#8
894	ldrb	r3,[r1],#4
895	orr	r2,r2,r0,lsl#16
896# if 13==15
897	str	r1,[sp,#17*4]			@ make room for r1
898# endif
899	eor	r0,r11,r11,ror#5
900	orr	r2,r2,r3,lsl#24
901	eor	r0,r0,r11,ror#19	@ Sigma1(e)
902#endif
903	ldr	r3,[r14],#4			@ *K256++
904	add	r6,r6,r2			@ h+=X[i]
905	str	r2,[sp,#13*4]
906	eor	r2,r4,r5
907	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
908	and	r2,r2,r11
909	add	r6,r6,r3			@ h+=K256[i]
910	eor	r2,r2,r5			@ Ch(e,f,g)
911	eor	r0,r7,r7,ror#11
912	add	r6,r6,r2			@ h+=Ch(e,f,g)
913#if 13==31
914	and	r3,r3,#0xff
915	cmp	r3,#0xf2			@ done?
916#endif
917#if 13<15
918# if __ARM_ARCH__>=7
919	ldr	r2,[r1],#4			@ prefetch
920# else
921	ldrb	r2,[r1,#3]
922# endif
923	eor	r3,r7,r8			@ a^b, b^c in next round
924#else
925	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
926	eor	r3,r7,r8			@ a^b, b^c in next round
927	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
928#endif
929	eor	r0,r0,r7,ror#20	@ Sigma0(a)
930	and	r12,r12,r3			@ (b^c)&=(a^b)
931	add	r10,r10,r6			@ d+=h
932	eor	r12,r12,r8			@ Maj(a,b,c)
933	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
934	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
935#if __ARM_ARCH__>=7
936	@ ldr	r2,[r1],#4			@ 14
937# if 14==15
938	str	r1,[sp,#17*4]			@ make room for r1
939# endif
940	eor	r0,r10,r10,ror#5
941	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
942	eor	r0,r0,r10,ror#19	@ Sigma1(e)
943# ifndef __ARMEB__
944	rev	r2,r2
945# endif
946#else
947	@ ldrb	r2,[r1,#3]			@ 14
948	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
949	ldrb	r12,[r1,#2]
950	ldrb	r0,[r1,#1]
951	orr	r2,r2,r12,lsl#8
952	ldrb	r12,[r1],#4
953	orr	r2,r2,r0,lsl#16
954# if 14==15
955	str	r1,[sp,#17*4]			@ make room for r1
956# endif
957	eor	r0,r10,r10,ror#5
958	orr	r2,r2,r12,lsl#24
959	eor	r0,r0,r10,ror#19	@ Sigma1(e)
960#endif
961	ldr	r12,[r14],#4			@ *K256++
962	add	r5,r5,r2			@ h+=X[i]
963	str	r2,[sp,#14*4]
964	eor	r2,r11,r4
965	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
966	and	r2,r2,r10
967	add	r5,r5,r12			@ h+=K256[i]
968	eor	r2,r2,r4			@ Ch(e,f,g)
969	eor	r0,r6,r6,ror#11
970	add	r5,r5,r2			@ h+=Ch(e,f,g)
971#if 14==31
972	and	r12,r12,#0xff
973	cmp	r12,#0xf2			@ done?
974#endif
975#if 14<15
976# if __ARM_ARCH__>=7
977	ldr	r2,[r1],#4			@ prefetch
978# else
979	ldrb	r2,[r1,#3]
980# endif
981	eor	r12,r6,r7			@ a^b, b^c in next round
982#else
983	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
984	eor	r12,r6,r7			@ a^b, b^c in next round
985	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
986#endif
987	eor	r0,r0,r6,ror#20	@ Sigma0(a)
988	and	r3,r3,r12			@ (b^c)&=(a^b)
989	add	r9,r9,r5			@ d+=h
990	eor	r3,r3,r7			@ Maj(a,b,c)
991	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
992	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
993#if __ARM_ARCH__>=7
994	@ ldr	r2,[r1],#4			@ 15
995# if 15==15
996	str	r1,[sp,#17*4]			@ make room for r1
997# endif
998	eor	r0,r9,r9,ror#5
999	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1000	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1001# ifndef __ARMEB__
1002	rev	r2,r2
1003# endif
1004#else
1005	@ ldrb	r2,[r1,#3]			@ 15
1006	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1007	ldrb	r3,[r1,#2]
1008	ldrb	r0,[r1,#1]
1009	orr	r2,r2,r3,lsl#8
1010	ldrb	r3,[r1],#4
1011	orr	r2,r2,r0,lsl#16
1012# if 15==15
1013	str	r1,[sp,#17*4]			@ make room for r1
1014# endif
1015	eor	r0,r9,r9,ror#5
1016	orr	r2,r2,r3,lsl#24
1017	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1018#endif
1019	ldr	r3,[r14],#4			@ *K256++
1020	add	r4,r4,r2			@ h+=X[i]
1021	str	r2,[sp,#15*4]
1022	eor	r2,r10,r11
1023	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1024	and	r2,r2,r9
1025	add	r4,r4,r3			@ h+=K256[i]
1026	eor	r2,r2,r11			@ Ch(e,f,g)
1027	eor	r0,r5,r5,ror#11
1028	add	r4,r4,r2			@ h+=Ch(e,f,g)
1029#if 15==31
1030	and	r3,r3,#0xff
1031	cmp	r3,#0xf2			@ done?
1032#endif
1033#if 15<15
1034# if __ARM_ARCH__>=7
1035	ldr	r2,[r1],#4			@ prefetch
1036# else
1037	ldrb	r2,[r1,#3]
1038# endif
1039	eor	r3,r5,r6			@ a^b, b^c in next round
1040#else
1041	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1042	eor	r3,r5,r6			@ a^b, b^c in next round
1043	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1044#endif
1045	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1046	and	r12,r12,r3			@ (b^c)&=(a^b)
1047	add	r8,r8,r4			@ d+=h
1048	eor	r12,r12,r6			@ Maj(a,b,c)
1049	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1050	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1051.Lrounds_16_xx:
1052	@ ldr	r2,[sp,#1*4]		@ 16
1053	@ ldr	r1,[sp,#14*4]
1054	mov	r0,r2,ror#7
1055	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1056	mov	r12,r1,ror#17
1057	eor	r0,r0,r2,ror#18
1058	eor	r12,r12,r1,ror#19
1059	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1060	ldr	r2,[sp,#0*4]
1061	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1062	ldr	r1,[sp,#9*4]
1063
1064	add	r12,r12,r0
1065	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1066	add	r2,r2,r12
1067	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1068	add	r2,r2,r1			@ X[i]
1069	ldr	r12,[r14],#4			@ *K256++
1070	add	r11,r11,r2			@ h+=X[i]
1071	str	r2,[sp,#0*4]
1072	eor	r2,r9,r10
1073	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1074	and	r2,r2,r8
1075	add	r11,r11,r12			@ h+=K256[i]
1076	eor	r2,r2,r10			@ Ch(e,f,g)
1077	eor	r0,r4,r4,ror#11
1078	add	r11,r11,r2			@ h+=Ch(e,f,g)
1079#if 16==31
1080	and	r12,r12,#0xff
1081	cmp	r12,#0xf2			@ done?
1082#endif
1083#if 16<15
1084# if __ARM_ARCH__>=7
1085	ldr	r2,[r1],#4			@ prefetch
1086# else
1087	ldrb	r2,[r1,#3]
1088# endif
1089	eor	r12,r4,r5			@ a^b, b^c in next round
1090#else
1091	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1092	eor	r12,r4,r5			@ a^b, b^c in next round
1093	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1094#endif
1095	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1096	and	r3,r3,r12			@ (b^c)&=(a^b)
1097	add	r7,r7,r11			@ d+=h
1098	eor	r3,r3,r5			@ Maj(a,b,c)
1099	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1100	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1101	@ ldr	r2,[sp,#2*4]		@ 17
1102	@ ldr	r1,[sp,#15*4]
1103	mov	r0,r2,ror#7
1104	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1105	mov	r3,r1,ror#17
1106	eor	r0,r0,r2,ror#18
1107	eor	r3,r3,r1,ror#19
1108	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1109	ldr	r2,[sp,#1*4]
1110	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1111	ldr	r1,[sp,#10*4]
1112
1113	add	r3,r3,r0
1114	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1115	add	r2,r2,r3
1116	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1117	add	r2,r2,r1			@ X[i]
1118	ldr	r3,[r14],#4			@ *K256++
1119	add	r10,r10,r2			@ h+=X[i]
1120	str	r2,[sp,#1*4]
1121	eor	r2,r8,r9
1122	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1123	and	r2,r2,r7
1124	add	r10,r10,r3			@ h+=K256[i]
1125	eor	r2,r2,r9			@ Ch(e,f,g)
1126	eor	r0,r11,r11,ror#11
1127	add	r10,r10,r2			@ h+=Ch(e,f,g)
1128#if 17==31
1129	and	r3,r3,#0xff
1130	cmp	r3,#0xf2			@ done?
1131#endif
1132#if 17<15
1133# if __ARM_ARCH__>=7
1134	ldr	r2,[r1],#4			@ prefetch
1135# else
1136	ldrb	r2,[r1,#3]
1137# endif
1138	eor	r3,r11,r4			@ a^b, b^c in next round
1139#else
1140	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1141	eor	r3,r11,r4			@ a^b, b^c in next round
1142	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1143#endif
1144	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1145	and	r12,r12,r3			@ (b^c)&=(a^b)
1146	add	r6,r6,r10			@ d+=h
1147	eor	r12,r12,r4			@ Maj(a,b,c)
1148	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1149	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1150	@ ldr	r2,[sp,#3*4]		@ 18
1151	@ ldr	r1,[sp,#0*4]
1152	mov	r0,r2,ror#7
1153	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1154	mov	r12,r1,ror#17
1155	eor	r0,r0,r2,ror#18
1156	eor	r12,r12,r1,ror#19
1157	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1158	ldr	r2,[sp,#2*4]
1159	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1160	ldr	r1,[sp,#11*4]
1161
1162	add	r12,r12,r0
1163	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1164	add	r2,r2,r12
1165	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1166	add	r2,r2,r1			@ X[i]
1167	ldr	r12,[r14],#4			@ *K256++
1168	add	r9,r9,r2			@ h+=X[i]
1169	str	r2,[sp,#2*4]
1170	eor	r2,r7,r8
1171	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1172	and	r2,r2,r6
1173	add	r9,r9,r12			@ h+=K256[i]
1174	eor	r2,r2,r8			@ Ch(e,f,g)
1175	eor	r0,r10,r10,ror#11
1176	add	r9,r9,r2			@ h+=Ch(e,f,g)
1177#if 18==31
1178	and	r12,r12,#0xff
1179	cmp	r12,#0xf2			@ done?
1180#endif
1181#if 18<15
1182# if __ARM_ARCH__>=7
1183	ldr	r2,[r1],#4			@ prefetch
1184# else
1185	ldrb	r2,[r1,#3]
1186# endif
1187	eor	r12,r10,r11			@ a^b, b^c in next round
1188#else
1189	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1190	eor	r12,r10,r11			@ a^b, b^c in next round
1191	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1192#endif
1193	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1194	and	r3,r3,r12			@ (b^c)&=(a^b)
1195	add	r5,r5,r9			@ d+=h
1196	eor	r3,r3,r11			@ Maj(a,b,c)
1197	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1198	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1199	@ ldr	r2,[sp,#4*4]		@ 19
1200	@ ldr	r1,[sp,#1*4]
1201	mov	r0,r2,ror#7
1202	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1203	mov	r3,r1,ror#17
1204	eor	r0,r0,r2,ror#18
1205	eor	r3,r3,r1,ror#19
1206	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1207	ldr	r2,[sp,#3*4]
1208	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1209	ldr	r1,[sp,#12*4]
1210
1211	add	r3,r3,r0
1212	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1213	add	r2,r2,r3
1214	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1215	add	r2,r2,r1			@ X[i]
1216	ldr	r3,[r14],#4			@ *K256++
1217	add	r8,r8,r2			@ h+=X[i]
1218	str	r2,[sp,#3*4]
1219	eor	r2,r6,r7
1220	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1221	and	r2,r2,r5
1222	add	r8,r8,r3			@ h+=K256[i]
1223	eor	r2,r2,r7			@ Ch(e,f,g)
1224	eor	r0,r9,r9,ror#11
1225	add	r8,r8,r2			@ h+=Ch(e,f,g)
1226#if 19==31
1227	and	r3,r3,#0xff
1228	cmp	r3,#0xf2			@ done?
1229#endif
1230#if 19<15
1231# if __ARM_ARCH__>=7
1232	ldr	r2,[r1],#4			@ prefetch
1233# else
1234	ldrb	r2,[r1,#3]
1235# endif
1236	eor	r3,r9,r10			@ a^b, b^c in next round
1237#else
1238	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1239	eor	r3,r9,r10			@ a^b, b^c in next round
1240	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1241#endif
1242	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1243	and	r12,r12,r3			@ (b^c)&=(a^b)
1244	add	r4,r4,r8			@ d+=h
1245	eor	r12,r12,r10			@ Maj(a,b,c)
1246	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1247	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1248	@ ldr	r2,[sp,#5*4]		@ 20
1249	@ ldr	r1,[sp,#2*4]
1250	mov	r0,r2,ror#7
1251	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1252	mov	r12,r1,ror#17
1253	eor	r0,r0,r2,ror#18
1254	eor	r12,r12,r1,ror#19
1255	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1256	ldr	r2,[sp,#4*4]
1257	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1258	ldr	r1,[sp,#13*4]
1259
1260	add	r12,r12,r0
1261	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1262	add	r2,r2,r12
1263	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1264	add	r2,r2,r1			@ X[i]
1265	ldr	r12,[r14],#4			@ *K256++
1266	add	r7,r7,r2			@ h+=X[i]
1267	str	r2,[sp,#4*4]
1268	eor	r2,r5,r6
1269	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1270	and	r2,r2,r4
1271	add	r7,r7,r12			@ h+=K256[i]
1272	eor	r2,r2,r6			@ Ch(e,f,g)
1273	eor	r0,r8,r8,ror#11
1274	add	r7,r7,r2			@ h+=Ch(e,f,g)
1275#if 20==31
1276	and	r12,r12,#0xff
1277	cmp	r12,#0xf2			@ done?
1278#endif
1279#if 20<15
1280# if __ARM_ARCH__>=7
1281	ldr	r2,[r1],#4			@ prefetch
1282# else
1283	ldrb	r2,[r1,#3]
1284# endif
1285	eor	r12,r8,r9			@ a^b, b^c in next round
1286#else
1287	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1288	eor	r12,r8,r9			@ a^b, b^c in next round
1289	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1290#endif
1291	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1292	and	r3,r3,r12			@ (b^c)&=(a^b)
1293	add	r11,r11,r7			@ d+=h
1294	eor	r3,r3,r9			@ Maj(a,b,c)
1295	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1296	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1297	@ ldr	r2,[sp,#6*4]		@ 21
1298	@ ldr	r1,[sp,#3*4]
1299	mov	r0,r2,ror#7
1300	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1301	mov	r3,r1,ror#17
1302	eor	r0,r0,r2,ror#18
1303	eor	r3,r3,r1,ror#19
1304	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1305	ldr	r2,[sp,#5*4]
1306	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1307	ldr	r1,[sp,#14*4]
1308
1309	add	r3,r3,r0
1310	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1311	add	r2,r2,r3
1312	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1313	add	r2,r2,r1			@ X[i]
1314	ldr	r3,[r14],#4			@ *K256++
1315	add	r6,r6,r2			@ h+=X[i]
1316	str	r2,[sp,#5*4]
1317	eor	r2,r4,r5
1318	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1319	and	r2,r2,r11
1320	add	r6,r6,r3			@ h+=K256[i]
1321	eor	r2,r2,r5			@ Ch(e,f,g)
1322	eor	r0,r7,r7,ror#11
1323	add	r6,r6,r2			@ h+=Ch(e,f,g)
1324#if 21==31
1325	and	r3,r3,#0xff
1326	cmp	r3,#0xf2			@ done?
1327#endif
1328#if 21<15
1329# if __ARM_ARCH__>=7
1330	ldr	r2,[r1],#4			@ prefetch
1331# else
1332	ldrb	r2,[r1,#3]
1333# endif
1334	eor	r3,r7,r8			@ a^b, b^c in next round
1335#else
1336	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1337	eor	r3,r7,r8			@ a^b, b^c in next round
1338	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1339#endif
1340	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1341	and	r12,r12,r3			@ (b^c)&=(a^b)
1342	add	r10,r10,r6			@ d+=h
1343	eor	r12,r12,r8			@ Maj(a,b,c)
1344	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1345	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1346	@ ldr	r2,[sp,#7*4]		@ 22
1347	@ ldr	r1,[sp,#4*4]
1348	mov	r0,r2,ror#7
1349	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1350	mov	r12,r1,ror#17
1351	eor	r0,r0,r2,ror#18
1352	eor	r12,r12,r1,ror#19
1353	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1354	ldr	r2,[sp,#6*4]
1355	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1356	ldr	r1,[sp,#15*4]
1357
1358	add	r12,r12,r0
1359	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1360	add	r2,r2,r12
1361	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1362	add	r2,r2,r1			@ X[i]
1363	ldr	r12,[r14],#4			@ *K256++
1364	add	r5,r5,r2			@ h+=X[i]
1365	str	r2,[sp,#6*4]
1366	eor	r2,r11,r4
1367	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1368	and	r2,r2,r10
1369	add	r5,r5,r12			@ h+=K256[i]
1370	eor	r2,r2,r4			@ Ch(e,f,g)
1371	eor	r0,r6,r6,ror#11
1372	add	r5,r5,r2			@ h+=Ch(e,f,g)
1373#if 22==31
1374	and	r12,r12,#0xff
1375	cmp	r12,#0xf2			@ done?
1376#endif
1377#if 22<15
1378# if __ARM_ARCH__>=7
1379	ldr	r2,[r1],#4			@ prefetch
1380# else
1381	ldrb	r2,[r1,#3]
1382# endif
1383	eor	r12,r6,r7			@ a^b, b^c in next round
1384#else
1385	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1386	eor	r12,r6,r7			@ a^b, b^c in next round
1387	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1388#endif
1389	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1390	and	r3,r3,r12			@ (b^c)&=(a^b)
1391	add	r9,r9,r5			@ d+=h
1392	eor	r3,r3,r7			@ Maj(a,b,c)
1393	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1394	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1395	@ ldr	r2,[sp,#8*4]		@ 23
1396	@ ldr	r1,[sp,#5*4]
1397	mov	r0,r2,ror#7
1398	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1399	mov	r3,r1,ror#17
1400	eor	r0,r0,r2,ror#18
1401	eor	r3,r3,r1,ror#19
1402	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1403	ldr	r2,[sp,#7*4]
1404	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1405	ldr	r1,[sp,#0*4]
1406
1407	add	r3,r3,r0
1408	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1409	add	r2,r2,r3
1410	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1411	add	r2,r2,r1			@ X[i]
1412	ldr	r3,[r14],#4			@ *K256++
1413	add	r4,r4,r2			@ h+=X[i]
1414	str	r2,[sp,#7*4]
1415	eor	r2,r10,r11
1416	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1417	and	r2,r2,r9
1418	add	r4,r4,r3			@ h+=K256[i]
1419	eor	r2,r2,r11			@ Ch(e,f,g)
1420	eor	r0,r5,r5,ror#11
1421	add	r4,r4,r2			@ h+=Ch(e,f,g)
1422#if 23==31
1423	and	r3,r3,#0xff
1424	cmp	r3,#0xf2			@ done?
1425#endif
1426#if 23<15
1427# if __ARM_ARCH__>=7
1428	ldr	r2,[r1],#4			@ prefetch
1429# else
1430	ldrb	r2,[r1,#3]
1431# endif
1432	eor	r3,r5,r6			@ a^b, b^c in next round
1433#else
1434	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1435	eor	r3,r5,r6			@ a^b, b^c in next round
1436	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1437#endif
1438	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1439	and	r12,r12,r3			@ (b^c)&=(a^b)
1440	add	r8,r8,r4			@ d+=h
1441	eor	r12,r12,r6			@ Maj(a,b,c)
1442	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1443	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1444	@ ldr	r2,[sp,#9*4]		@ 24
1445	@ ldr	r1,[sp,#6*4]
1446	mov	r0,r2,ror#7
1447	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1448	mov	r12,r1,ror#17
1449	eor	r0,r0,r2,ror#18
1450	eor	r12,r12,r1,ror#19
1451	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1452	ldr	r2,[sp,#8*4]
1453	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1454	ldr	r1,[sp,#1*4]
1455
1456	add	r12,r12,r0
1457	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1458	add	r2,r2,r12
1459	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1460	add	r2,r2,r1			@ X[i]
1461	ldr	r12,[r14],#4			@ *K256++
1462	add	r11,r11,r2			@ h+=X[i]
1463	str	r2,[sp,#8*4]
1464	eor	r2,r9,r10
1465	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1466	and	r2,r2,r8
1467	add	r11,r11,r12			@ h+=K256[i]
1468	eor	r2,r2,r10			@ Ch(e,f,g)
1469	eor	r0,r4,r4,ror#11
1470	add	r11,r11,r2			@ h+=Ch(e,f,g)
1471#if 24==31
1472	and	r12,r12,#0xff
1473	cmp	r12,#0xf2			@ done?
1474#endif
1475#if 24<15
1476# if __ARM_ARCH__>=7
1477	ldr	r2,[r1],#4			@ prefetch
1478# else
1479	ldrb	r2,[r1,#3]
1480# endif
1481	eor	r12,r4,r5			@ a^b, b^c in next round
1482#else
1483	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1484	eor	r12,r4,r5			@ a^b, b^c in next round
1485	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1486#endif
1487	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1488	and	r3,r3,r12			@ (b^c)&=(a^b)
1489	add	r7,r7,r11			@ d+=h
1490	eor	r3,r3,r5			@ Maj(a,b,c)
1491	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1492	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1493	@ ldr	r2,[sp,#10*4]		@ 25
1494	@ ldr	r1,[sp,#7*4]
1495	mov	r0,r2,ror#7
1496	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1497	mov	r3,r1,ror#17
1498	eor	r0,r0,r2,ror#18
1499	eor	r3,r3,r1,ror#19
1500	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1501	ldr	r2,[sp,#9*4]
1502	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1503	ldr	r1,[sp,#2*4]
1504
1505	add	r3,r3,r0
1506	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1507	add	r2,r2,r3
1508	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1509	add	r2,r2,r1			@ X[i]
1510	ldr	r3,[r14],#4			@ *K256++
1511	add	r10,r10,r2			@ h+=X[i]
1512	str	r2,[sp,#9*4]
1513	eor	r2,r8,r9
1514	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1515	and	r2,r2,r7
1516	add	r10,r10,r3			@ h+=K256[i]
1517	eor	r2,r2,r9			@ Ch(e,f,g)
1518	eor	r0,r11,r11,ror#11
1519	add	r10,r10,r2			@ h+=Ch(e,f,g)
1520#if 25==31
1521	and	r3,r3,#0xff
1522	cmp	r3,#0xf2			@ done?
1523#endif
1524#if 25<15
1525# if __ARM_ARCH__>=7
1526	ldr	r2,[r1],#4			@ prefetch
1527# else
1528	ldrb	r2,[r1,#3]
1529# endif
1530	eor	r3,r11,r4			@ a^b, b^c in next round
1531#else
1532	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1533	eor	r3,r11,r4			@ a^b, b^c in next round
1534	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1535#endif
1536	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1537	and	r12,r12,r3			@ (b^c)&=(a^b)
1538	add	r6,r6,r10			@ d+=h
1539	eor	r12,r12,r4			@ Maj(a,b,c)
1540	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1541	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1542	@ ldr	r2,[sp,#11*4]		@ 26
1543	@ ldr	r1,[sp,#8*4]
1544	mov	r0,r2,ror#7
1545	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1546	mov	r12,r1,ror#17
1547	eor	r0,r0,r2,ror#18
1548	eor	r12,r12,r1,ror#19
1549	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1550	ldr	r2,[sp,#10*4]
1551	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1552	ldr	r1,[sp,#3*4]
1553
1554	add	r12,r12,r0
1555	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1556	add	r2,r2,r12
1557	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1558	add	r2,r2,r1			@ X[i]
1559	ldr	r12,[r14],#4			@ *K256++
1560	add	r9,r9,r2			@ h+=X[i]
1561	str	r2,[sp,#10*4]
1562	eor	r2,r7,r8
1563	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1564	and	r2,r2,r6
1565	add	r9,r9,r12			@ h+=K256[i]
1566	eor	r2,r2,r8			@ Ch(e,f,g)
1567	eor	r0,r10,r10,ror#11
1568	add	r9,r9,r2			@ h+=Ch(e,f,g)
1569#if 26==31
1570	and	r12,r12,#0xff
1571	cmp	r12,#0xf2			@ done?
1572#endif
1573#if 26<15
1574# if __ARM_ARCH__>=7
1575	ldr	r2,[r1],#4			@ prefetch
1576# else
1577	ldrb	r2,[r1,#3]
1578# endif
1579	eor	r12,r10,r11			@ a^b, b^c in next round
1580#else
1581	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1582	eor	r12,r10,r11			@ a^b, b^c in next round
1583	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1584#endif
1585	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1586	and	r3,r3,r12			@ (b^c)&=(a^b)
1587	add	r5,r5,r9			@ d+=h
1588	eor	r3,r3,r11			@ Maj(a,b,c)
1589	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1590	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1591	@ ldr	r2,[sp,#12*4]		@ 27
1592	@ ldr	r1,[sp,#9*4]
1593	mov	r0,r2,ror#7
1594	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1595	mov	r3,r1,ror#17
1596	eor	r0,r0,r2,ror#18
1597	eor	r3,r3,r1,ror#19
1598	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1599	ldr	r2,[sp,#11*4]
1600	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1601	ldr	r1,[sp,#4*4]
1602
1603	add	r3,r3,r0
1604	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1605	add	r2,r2,r3
1606	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1607	add	r2,r2,r1			@ X[i]
1608	ldr	r3,[r14],#4			@ *K256++
1609	add	r8,r8,r2			@ h+=X[i]
1610	str	r2,[sp,#11*4]
1611	eor	r2,r6,r7
1612	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1613	and	r2,r2,r5
1614	add	r8,r8,r3			@ h+=K256[i]
1615	eor	r2,r2,r7			@ Ch(e,f,g)
1616	eor	r0,r9,r9,ror#11
1617	add	r8,r8,r2			@ h+=Ch(e,f,g)
1618#if 27==31
1619	and	r3,r3,#0xff
1620	cmp	r3,#0xf2			@ done?
1621#endif
1622#if 27<15
1623# if __ARM_ARCH__>=7
1624	ldr	r2,[r1],#4			@ prefetch
1625# else
1626	ldrb	r2,[r1,#3]
1627# endif
1628	eor	r3,r9,r10			@ a^b, b^c in next round
1629#else
1630	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1631	eor	r3,r9,r10			@ a^b, b^c in next round
1632	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1633#endif
1634	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1635	and	r12,r12,r3			@ (b^c)&=(a^b)
1636	add	r4,r4,r8			@ d+=h
1637	eor	r12,r12,r10			@ Maj(a,b,c)
1638	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1639	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1640	@ ldr	r2,[sp,#13*4]		@ 28
1641	@ ldr	r1,[sp,#10*4]
1642	mov	r0,r2,ror#7
1643	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1644	mov	r12,r1,ror#17
1645	eor	r0,r0,r2,ror#18
1646	eor	r12,r12,r1,ror#19
1647	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1648	ldr	r2,[sp,#12*4]
1649	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1650	ldr	r1,[sp,#5*4]
1651
1652	add	r12,r12,r0
1653	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1654	add	r2,r2,r12
1655	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1656	add	r2,r2,r1			@ X[i]
1657	ldr	r12,[r14],#4			@ *K256++
1658	add	r7,r7,r2			@ h+=X[i]
1659	str	r2,[sp,#12*4]
1660	eor	r2,r5,r6
1661	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1662	and	r2,r2,r4
1663	add	r7,r7,r12			@ h+=K256[i]
1664	eor	r2,r2,r6			@ Ch(e,f,g)
1665	eor	r0,r8,r8,ror#11
1666	add	r7,r7,r2			@ h+=Ch(e,f,g)
1667#if 28==31
1668	and	r12,r12,#0xff
1669	cmp	r12,#0xf2			@ done?
1670#endif
1671#if 28<15
1672# if __ARM_ARCH__>=7
1673	ldr	r2,[r1],#4			@ prefetch
1674# else
1675	ldrb	r2,[r1,#3]
1676# endif
1677	eor	r12,r8,r9			@ a^b, b^c in next round
1678#else
1679	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1680	eor	r12,r8,r9			@ a^b, b^c in next round
1681	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1682#endif
1683	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1684	and	r3,r3,r12			@ (b^c)&=(a^b)
1685	add	r11,r11,r7			@ d+=h
1686	eor	r3,r3,r9			@ Maj(a,b,c)
1687	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1688	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1689	@ ldr	r2,[sp,#14*4]		@ 29
1690	@ ldr	r1,[sp,#11*4]
1691	mov	r0,r2,ror#7
1692	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1693	mov	r3,r1,ror#17
1694	eor	r0,r0,r2,ror#18
1695	eor	r3,r3,r1,ror#19
1696	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1697	ldr	r2,[sp,#13*4]
1698	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1699	ldr	r1,[sp,#6*4]
1700
1701	add	r3,r3,r0
1702	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1703	add	r2,r2,r3
1704	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1705	add	r2,r2,r1			@ X[i]
1706	ldr	r3,[r14],#4			@ *K256++
1707	add	r6,r6,r2			@ h+=X[i]
1708	str	r2,[sp,#13*4]
1709	eor	r2,r4,r5
1710	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1711	and	r2,r2,r11
1712	add	r6,r6,r3			@ h+=K256[i]
1713	eor	r2,r2,r5			@ Ch(e,f,g)
1714	eor	r0,r7,r7,ror#11
1715	add	r6,r6,r2			@ h+=Ch(e,f,g)
1716#if 29==31
1717	and	r3,r3,#0xff
1718	cmp	r3,#0xf2			@ done?
1719#endif
1720#if 29<15
1721# if __ARM_ARCH__>=7
1722	ldr	r2,[r1],#4			@ prefetch
1723# else
1724	ldrb	r2,[r1,#3]
1725# endif
1726	eor	r3,r7,r8			@ a^b, b^c in next round
1727#else
1728	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1729	eor	r3,r7,r8			@ a^b, b^c in next round
1730	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1731#endif
1732	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1733	and	r12,r12,r3			@ (b^c)&=(a^b)
1734	add	r10,r10,r6			@ d+=h
1735	eor	r12,r12,r8			@ Maj(a,b,c)
1736	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1737	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1738	@ ldr	r2,[sp,#15*4]		@ 30
1739	@ ldr	r1,[sp,#12*4]
1740	mov	r0,r2,ror#7
1741	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1742	mov	r12,r1,ror#17
1743	eor	r0,r0,r2,ror#18
1744	eor	r12,r12,r1,ror#19
1745	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1746	ldr	r2,[sp,#14*4]
1747	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1748	ldr	r1,[sp,#7*4]
1749
1750	add	r12,r12,r0
1751	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1752	add	r2,r2,r12
1753	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1754	add	r2,r2,r1			@ X[i]
1755	ldr	r12,[r14],#4			@ *K256++
1756	add	r5,r5,r2			@ h+=X[i]
1757	str	r2,[sp,#14*4]
1758	eor	r2,r11,r4
1759	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1760	and	r2,r2,r10
1761	add	r5,r5,r12			@ h+=K256[i]
1762	eor	r2,r2,r4			@ Ch(e,f,g)
1763	eor	r0,r6,r6,ror#11
1764	add	r5,r5,r2			@ h+=Ch(e,f,g)
1765#if 30==31
1766	and	r12,r12,#0xff
1767	cmp	r12,#0xf2			@ done?
1768#endif
1769#if 30<15
1770# if __ARM_ARCH__>=7
1771	ldr	r2,[r1],#4			@ prefetch
1772# else
1773	ldrb	r2,[r1,#3]
1774# endif
1775	eor	r12,r6,r7			@ a^b, b^c in next round
1776#else
1777	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1778	eor	r12,r6,r7			@ a^b, b^c in next round
1779	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1780#endif
1781	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1782	and	r3,r3,r12			@ (b^c)&=(a^b)
1783	add	r9,r9,r5			@ d+=h
1784	eor	r3,r3,r7			@ Maj(a,b,c)
1785	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1786	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1787	@ ldr	r2,[sp,#0*4]		@ 31
1788	@ ldr	r1,[sp,#13*4]
1789	mov	r0,r2,ror#7
1790	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1791	mov	r3,r1,ror#17
1792	eor	r0,r0,r2,ror#18
1793	eor	r3,r3,r1,ror#19
1794	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1795	ldr	r2,[sp,#15*4]
1796	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1797	ldr	r1,[sp,#8*4]
1798
1799	add	r3,r3,r0
1800	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1801	add	r2,r2,r3
1802	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1803	add	r2,r2,r1			@ X[i]
1804	ldr	r3,[r14],#4			@ *K256++
1805	add	r4,r4,r2			@ h+=X[i]
1806	str	r2,[sp,#15*4]
1807	eor	r2,r10,r11
1808	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1809	and	r2,r2,r9
1810	add	r4,r4,r3			@ h+=K256[i]
1811	eor	r2,r2,r11			@ Ch(e,f,g)
1812	eor	r0,r5,r5,ror#11
1813	add	r4,r4,r2			@ h+=Ch(e,f,g)
1814#if 31==31
1815	and	r3,r3,#0xff
1816	cmp	r3,#0xf2			@ done?
1817#endif
1818#if 31<15
1819# if __ARM_ARCH__>=7
1820	ldr	r2,[r1],#4			@ prefetch
1821# else
1822	ldrb	r2,[r1,#3]
1823# endif
1824	eor	r3,r5,r6			@ a^b, b^c in next round
1825#else
1826	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1827	eor	r3,r5,r6			@ a^b, b^c in next round
1828	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1829#endif
1830	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1831	and	r12,r12,r3			@ (b^c)&=(a^b)
1832	add	r8,r8,r4			@ d+=h
1833	eor	r12,r12,r6			@ Maj(a,b,c)
1834	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1835	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1836#if __ARM_ARCH__>=7
1837	ite	eq			@ Thumb2 thing, sanity check in ARM
1838#endif
1839	ldreq	r3,[sp,#16*4]		@ pull ctx
1840	bne	.Lrounds_16_xx
1841
1842	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1843	ldr	r0,[r3,#0]
1844	ldr	r2,[r3,#4]
1845	ldr	r12,[r3,#8]
1846	add	r4,r4,r0
1847	ldr	r0,[r3,#12]
1848	add	r5,r5,r2
1849	ldr	r2,[r3,#16]
1850	add	r6,r6,r12
1851	ldr	r12,[r3,#20]
1852	add	r7,r7,r0
1853	ldr	r0,[r3,#24]
1854	add	r8,r8,r2
1855	ldr	r2,[r3,#28]
1856	add	r9,r9,r12
1857	ldr	r1,[sp,#17*4]		@ pull inp
1858	ldr	r12,[sp,#18*4]		@ pull inp+len
1859	add	r10,r10,r0
1860	add	r11,r11,r2
1861	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1862	cmp	r1,r12
1863	sub	r14,r14,#256	@ rewind Ktbl
1864	bne	.Loop
1865
1866	add	sp,sp,#19*4	@ destroy frame
1867#if __ARM_ARCH__>=5
1868	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,pc}
1869#else
1870	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,lr}
1871	tst	lr,#1
1872	moveq	pc,lr			@ be binary compatible with V4, yet
1873.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1874#endif
1875.size	sha256_block_data_order,.-sha256_block_data_order
1876#if __ARM_MAX_ARCH__>=7
1877.arch	armv7-a
1878.fpu	neon
1879
1880.globl	sha256_block_data_order_neon
1881.hidden	sha256_block_data_order_neon
1882.type	sha256_block_data_order_neon,%function
1883.align	5
1884.skip	16
1885sha256_block_data_order_neon:
1886.LNEON:
1887	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
1888
1889	sub	r11,sp,#16*4+16
1890	adr	r14,K256
1891	bic	r11,r11,#15		@ align for 128-bit stores
1892	mov	r12,sp
1893	mov	sp,r11			@ alloca
1894	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1895
1896	vld1.8	{q0},[r1]!
1897	vld1.8	{q1},[r1]!
1898	vld1.8	{q2},[r1]!
1899	vld1.8	{q3},[r1]!
1900	vld1.32	{q8},[r14,:128]!
1901	vld1.32	{q9},[r14,:128]!
1902	vld1.32	{q10},[r14,:128]!
1903	vld1.32	{q11},[r14,:128]!
1904	vrev32.8	q0,q0		@ yes, even on
1905	str	r0,[sp,#64]
1906	vrev32.8	q1,q1		@ big-endian
1907	str	r1,[sp,#68]
1908	mov	r1,sp
1909	vrev32.8	q2,q2
1910	str	r2,[sp,#72]
1911	vrev32.8	q3,q3
1912	str	r12,[sp,#76]		@ save original sp
1913	vadd.i32	q8,q8,q0
1914	vadd.i32	q9,q9,q1
1915	vst1.32	{q8},[r1,:128]!
1916	vadd.i32	q10,q10,q2
1917	vst1.32	{q9},[r1,:128]!
1918	vadd.i32	q11,q11,q3
1919	vst1.32	{q10},[r1,:128]!
1920	vst1.32	{q11},[r1,:128]!
1921
1922	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
1923	sub	r1,r1,#64
1924	ldr	r2,[sp,#0]
1925	eor	r12,r12,r12
1926	eor	r3,r5,r6
1927	b	.L_00_48
1928
1929.align	4
1930.L_00_48:
1931	vext.8	q8,q0,q1,#4
1932	add	r11,r11,r2
1933	eor	r2,r9,r10
1934	eor	r0,r8,r8,ror#5
1935	vext.8	q9,q2,q3,#4
1936	add	r4,r4,r12
1937	and	r2,r2,r8
1938	eor	r12,r0,r8,ror#19
1939	vshr.u32	q10,q8,#7
1940	eor	r0,r4,r4,ror#11
1941	eor	r2,r2,r10
1942	vadd.i32	q0,q0,q9
1943	add	r11,r11,r12,ror#6
1944	eor	r12,r4,r5
1945	vshr.u32	q9,q8,#3
1946	eor	r0,r0,r4,ror#20
1947	add	r11,r11,r2
1948	vsli.32	q10,q8,#25
1949	ldr	r2,[sp,#4]
1950	and	r3,r3,r12
1951	vshr.u32	q11,q8,#18
1952	add	r7,r7,r11
1953	add	r11,r11,r0,ror#2
1954	eor	r3,r3,r5
1955	veor	q9,q9,q10
1956	add	r10,r10,r2
1957	vsli.32	q11,q8,#14
1958	eor	r2,r8,r9
1959	eor	r0,r7,r7,ror#5
1960	vshr.u32	d24,d7,#17
1961	add	r11,r11,r3
1962	and	r2,r2,r7
1963	veor	q9,q9,q11
1964	eor	r3,r0,r7,ror#19
1965	eor	r0,r11,r11,ror#11
1966	vsli.32	d24,d7,#15
1967	eor	r2,r2,r9
1968	add	r10,r10,r3,ror#6
1969	vshr.u32	d25,d7,#10
1970	eor	r3,r11,r4
1971	eor	r0,r0,r11,ror#20
1972	vadd.i32	q0,q0,q9
1973	add	r10,r10,r2
1974	ldr	r2,[sp,#8]
1975	veor	d25,d25,d24
1976	and	r12,r12,r3
1977	add	r6,r6,r10
1978	vshr.u32	d24,d7,#19
1979	add	r10,r10,r0,ror#2
1980	eor	r12,r12,r4
1981	vsli.32	d24,d7,#13
1982	add	r9,r9,r2
1983	eor	r2,r7,r8
1984	veor	d25,d25,d24
1985	eor	r0,r6,r6,ror#5
1986	add	r10,r10,r12
1987	vadd.i32	d0,d0,d25
1988	and	r2,r2,r6
1989	eor	r12,r0,r6,ror#19
1990	vshr.u32	d24,d0,#17
1991	eor	r0,r10,r10,ror#11
1992	eor	r2,r2,r8
1993	vsli.32	d24,d0,#15
1994	add	r9,r9,r12,ror#6
1995	eor	r12,r10,r11
1996	vshr.u32	d25,d0,#10
1997	eor	r0,r0,r10,ror#20
1998	add	r9,r9,r2
1999	veor	d25,d25,d24
2000	ldr	r2,[sp,#12]
2001	and	r3,r3,r12
2002	vshr.u32	d24,d0,#19
2003	add	r5,r5,r9
2004	add	r9,r9,r0,ror#2
2005	eor	r3,r3,r11
2006	vld1.32	{q8},[r14,:128]!
2007	add	r8,r8,r2
2008	vsli.32	d24,d0,#13
2009	eor	r2,r6,r7
2010	eor	r0,r5,r5,ror#5
2011	veor	d25,d25,d24
2012	add	r9,r9,r3
2013	and	r2,r2,r5
2014	vadd.i32	d1,d1,d25
2015	eor	r3,r0,r5,ror#19
2016	eor	r0,r9,r9,ror#11
2017	vadd.i32	q8,q8,q0
2018	eor	r2,r2,r7
2019	add	r8,r8,r3,ror#6
2020	eor	r3,r9,r10
2021	eor	r0,r0,r9,ror#20
2022	add	r8,r8,r2
2023	ldr	r2,[sp,#16]
2024	and	r12,r12,r3
2025	add	r4,r4,r8
2026	vst1.32	{q8},[r1,:128]!
2027	add	r8,r8,r0,ror#2
2028	eor	r12,r12,r10
2029	vext.8	q8,q1,q2,#4
2030	add	r7,r7,r2
2031	eor	r2,r5,r6
2032	eor	r0,r4,r4,ror#5
2033	vext.8	q9,q3,q0,#4
2034	add	r8,r8,r12
2035	and	r2,r2,r4
2036	eor	r12,r0,r4,ror#19
2037	vshr.u32	q10,q8,#7
2038	eor	r0,r8,r8,ror#11
2039	eor	r2,r2,r6
2040	vadd.i32	q1,q1,q9
2041	add	r7,r7,r12,ror#6
2042	eor	r12,r8,r9
2043	vshr.u32	q9,q8,#3
2044	eor	r0,r0,r8,ror#20
2045	add	r7,r7,r2
2046	vsli.32	q10,q8,#25
2047	ldr	r2,[sp,#20]
2048	and	r3,r3,r12
2049	vshr.u32	q11,q8,#18
2050	add	r11,r11,r7
2051	add	r7,r7,r0,ror#2
2052	eor	r3,r3,r9
2053	veor	q9,q9,q10
2054	add	r6,r6,r2
2055	vsli.32	q11,q8,#14
2056	eor	r2,r4,r5
2057	eor	r0,r11,r11,ror#5
2058	vshr.u32	d24,d1,#17
2059	add	r7,r7,r3
2060	and	r2,r2,r11
2061	veor	q9,q9,q11
2062	eor	r3,r0,r11,ror#19
2063	eor	r0,r7,r7,ror#11
2064	vsli.32	d24,d1,#15
2065	eor	r2,r2,r5
2066	add	r6,r6,r3,ror#6
2067	vshr.u32	d25,d1,#10
2068	eor	r3,r7,r8
2069	eor	r0,r0,r7,ror#20
2070	vadd.i32	q1,q1,q9
2071	add	r6,r6,r2
2072	ldr	r2,[sp,#24]
2073	veor	d25,d25,d24
2074	and	r12,r12,r3
2075	add	r10,r10,r6
2076	vshr.u32	d24,d1,#19
2077	add	r6,r6,r0,ror#2
2078	eor	r12,r12,r8
2079	vsli.32	d24,d1,#13
2080	add	r5,r5,r2
2081	eor	r2,r11,r4
2082	veor	d25,d25,d24
2083	eor	r0,r10,r10,ror#5
2084	add	r6,r6,r12
2085	vadd.i32	d2,d2,d25
2086	and	r2,r2,r10
2087	eor	r12,r0,r10,ror#19
2088	vshr.u32	d24,d2,#17
2089	eor	r0,r6,r6,ror#11
2090	eor	r2,r2,r4
2091	vsli.32	d24,d2,#15
2092	add	r5,r5,r12,ror#6
2093	eor	r12,r6,r7
2094	vshr.u32	d25,d2,#10
2095	eor	r0,r0,r6,ror#20
2096	add	r5,r5,r2
2097	veor	d25,d25,d24
2098	ldr	r2,[sp,#28]
2099	and	r3,r3,r12
2100	vshr.u32	d24,d2,#19
2101	add	r9,r9,r5
2102	add	r5,r5,r0,ror#2
2103	eor	r3,r3,r7
2104	vld1.32	{q8},[r14,:128]!
2105	add	r4,r4,r2
2106	vsli.32	d24,d2,#13
2107	eor	r2,r10,r11
2108	eor	r0,r9,r9,ror#5
2109	veor	d25,d25,d24
2110	add	r5,r5,r3
2111	and	r2,r2,r9
2112	vadd.i32	d3,d3,d25
2113	eor	r3,r0,r9,ror#19
2114	eor	r0,r5,r5,ror#11
2115	vadd.i32	q8,q8,q1
2116	eor	r2,r2,r11
2117	add	r4,r4,r3,ror#6
2118	eor	r3,r5,r6
2119	eor	r0,r0,r5,ror#20
2120	add	r4,r4,r2
2121	ldr	r2,[sp,#32]
2122	and	r12,r12,r3
2123	add	r8,r8,r4
2124	vst1.32	{q8},[r1,:128]!
2125	add	r4,r4,r0,ror#2
2126	eor	r12,r12,r6
2127	vext.8	q8,q2,q3,#4
2128	add	r11,r11,r2
2129	eor	r2,r9,r10
2130	eor	r0,r8,r8,ror#5
2131	vext.8	q9,q0,q1,#4
2132	add	r4,r4,r12
2133	and	r2,r2,r8
2134	eor	r12,r0,r8,ror#19
2135	vshr.u32	q10,q8,#7
2136	eor	r0,r4,r4,ror#11
2137	eor	r2,r2,r10
2138	vadd.i32	q2,q2,q9
2139	add	r11,r11,r12,ror#6
2140	eor	r12,r4,r5
2141	vshr.u32	q9,q8,#3
2142	eor	r0,r0,r4,ror#20
2143	add	r11,r11,r2
2144	vsli.32	q10,q8,#25
2145	ldr	r2,[sp,#36]
2146	and	r3,r3,r12
2147	vshr.u32	q11,q8,#18
2148	add	r7,r7,r11
2149	add	r11,r11,r0,ror#2
2150	eor	r3,r3,r5
2151	veor	q9,q9,q10
2152	add	r10,r10,r2
2153	vsli.32	q11,q8,#14
2154	eor	r2,r8,r9
2155	eor	r0,r7,r7,ror#5
2156	vshr.u32	d24,d3,#17
2157	add	r11,r11,r3
2158	and	r2,r2,r7
2159	veor	q9,q9,q11
2160	eor	r3,r0,r7,ror#19
2161	eor	r0,r11,r11,ror#11
2162	vsli.32	d24,d3,#15
2163	eor	r2,r2,r9
2164	add	r10,r10,r3,ror#6
2165	vshr.u32	d25,d3,#10
2166	eor	r3,r11,r4
2167	eor	r0,r0,r11,ror#20
2168	vadd.i32	q2,q2,q9
2169	add	r10,r10,r2
2170	ldr	r2,[sp,#40]
2171	veor	d25,d25,d24
2172	and	r12,r12,r3
2173	add	r6,r6,r10
2174	vshr.u32	d24,d3,#19
2175	add	r10,r10,r0,ror#2
2176	eor	r12,r12,r4
2177	vsli.32	d24,d3,#13
2178	add	r9,r9,r2
2179	eor	r2,r7,r8
2180	veor	d25,d25,d24
2181	eor	r0,r6,r6,ror#5
2182	add	r10,r10,r12
2183	vadd.i32	d4,d4,d25
2184	and	r2,r2,r6
2185	eor	r12,r0,r6,ror#19
2186	vshr.u32	d24,d4,#17
2187	eor	r0,r10,r10,ror#11
2188	eor	r2,r2,r8
2189	vsli.32	d24,d4,#15
2190	add	r9,r9,r12,ror#6
2191	eor	r12,r10,r11
2192	vshr.u32	d25,d4,#10
2193	eor	r0,r0,r10,ror#20
2194	add	r9,r9,r2
2195	veor	d25,d25,d24
2196	ldr	r2,[sp,#44]
2197	and	r3,r3,r12
2198	vshr.u32	d24,d4,#19
2199	add	r5,r5,r9
2200	add	r9,r9,r0,ror#2
2201	eor	r3,r3,r11
2202	vld1.32	{q8},[r14,:128]!
2203	add	r8,r8,r2
2204	vsli.32	d24,d4,#13
2205	eor	r2,r6,r7
2206	eor	r0,r5,r5,ror#5
2207	veor	d25,d25,d24
2208	add	r9,r9,r3
2209	and	r2,r2,r5
2210	vadd.i32	d5,d5,d25
2211	eor	r3,r0,r5,ror#19
2212	eor	r0,r9,r9,ror#11
2213	vadd.i32	q8,q8,q2
2214	eor	r2,r2,r7
2215	add	r8,r8,r3,ror#6
2216	eor	r3,r9,r10
2217	eor	r0,r0,r9,ror#20
2218	add	r8,r8,r2
2219	ldr	r2,[sp,#48]
2220	and	r12,r12,r3
2221	add	r4,r4,r8
2222	vst1.32	{q8},[r1,:128]!
2223	add	r8,r8,r0,ror#2
2224	eor	r12,r12,r10
2225	vext.8	q8,q3,q0,#4
2226	add	r7,r7,r2
2227	eor	r2,r5,r6
2228	eor	r0,r4,r4,ror#5
2229	vext.8	q9,q1,q2,#4
2230	add	r8,r8,r12
2231	and	r2,r2,r4
2232	eor	r12,r0,r4,ror#19
2233	vshr.u32	q10,q8,#7
2234	eor	r0,r8,r8,ror#11
2235	eor	r2,r2,r6
2236	vadd.i32	q3,q3,q9
2237	add	r7,r7,r12,ror#6
2238	eor	r12,r8,r9
2239	vshr.u32	q9,q8,#3
2240	eor	r0,r0,r8,ror#20
2241	add	r7,r7,r2
2242	vsli.32	q10,q8,#25
2243	ldr	r2,[sp,#52]
2244	and	r3,r3,r12
2245	vshr.u32	q11,q8,#18
2246	add	r11,r11,r7
2247	add	r7,r7,r0,ror#2
2248	eor	r3,r3,r9
2249	veor	q9,q9,q10
2250	add	r6,r6,r2
2251	vsli.32	q11,q8,#14
2252	eor	r2,r4,r5
2253	eor	r0,r11,r11,ror#5
2254	vshr.u32	d24,d5,#17
2255	add	r7,r7,r3
2256	and	r2,r2,r11
2257	veor	q9,q9,q11
2258	eor	r3,r0,r11,ror#19
2259	eor	r0,r7,r7,ror#11
2260	vsli.32	d24,d5,#15
2261	eor	r2,r2,r5
2262	add	r6,r6,r3,ror#6
2263	vshr.u32	d25,d5,#10
2264	eor	r3,r7,r8
2265	eor	r0,r0,r7,ror#20
2266	vadd.i32	q3,q3,q9
2267	add	r6,r6,r2
2268	ldr	r2,[sp,#56]
2269	veor	d25,d25,d24
2270	and	r12,r12,r3
2271	add	r10,r10,r6
2272	vshr.u32	d24,d5,#19
2273	add	r6,r6,r0,ror#2
2274	eor	r12,r12,r8
2275	vsli.32	d24,d5,#13
2276	add	r5,r5,r2
2277	eor	r2,r11,r4
2278	veor	d25,d25,d24
2279	eor	r0,r10,r10,ror#5
2280	add	r6,r6,r12
2281	vadd.i32	d6,d6,d25
2282	and	r2,r2,r10
2283	eor	r12,r0,r10,ror#19
2284	vshr.u32	d24,d6,#17
2285	eor	r0,r6,r6,ror#11
2286	eor	r2,r2,r4
2287	vsli.32	d24,d6,#15
2288	add	r5,r5,r12,ror#6
2289	eor	r12,r6,r7
2290	vshr.u32	d25,d6,#10
2291	eor	r0,r0,r6,ror#20
2292	add	r5,r5,r2
2293	veor	d25,d25,d24
2294	ldr	r2,[sp,#60]
2295	and	r3,r3,r12
2296	vshr.u32	d24,d6,#19
2297	add	r9,r9,r5
2298	add	r5,r5,r0,ror#2
2299	eor	r3,r3,r7
2300	vld1.32	{q8},[r14,:128]!
2301	add	r4,r4,r2
2302	vsli.32	d24,d6,#13
2303	eor	r2,r10,r11
2304	eor	r0,r9,r9,ror#5
2305	veor	d25,d25,d24
2306	add	r5,r5,r3
2307	and	r2,r2,r9
2308	vadd.i32	d7,d7,d25
2309	eor	r3,r0,r9,ror#19
2310	eor	r0,r5,r5,ror#11
2311	vadd.i32	q8,q8,q3
2312	eor	r2,r2,r11
2313	add	r4,r4,r3,ror#6
2314	eor	r3,r5,r6
2315	eor	r0,r0,r5,ror#20
2316	add	r4,r4,r2
2317	ldr	r2,[r14]
2318	and	r12,r12,r3
2319	add	r8,r8,r4
2320	vst1.32	{q8},[r1,:128]!
2321	add	r4,r4,r0,ror#2
2322	eor	r12,r12,r6
2323	teq	r2,#0				@ check for K256 terminator
2324	ldr	r2,[sp,#0]
2325	sub	r1,r1,#64
2326	bne	.L_00_48
2327
2328	ldr	r1,[sp,#68]
2329	ldr	r0,[sp,#72]
2330	sub	r14,r14,#256	@ rewind r14
2331	teq	r1,r0
2332	it	eq
2333	subeq	r1,r1,#64		@ avoid SEGV
2334	vld1.8	{q0},[r1]!		@ load next input block
2335	vld1.8	{q1},[r1]!
2336	vld1.8	{q2},[r1]!
2337	vld1.8	{q3},[r1]!
2338	it	ne
2339	strne	r1,[sp,#68]
2340	mov	r1,sp
2341	add	r11,r11,r2
2342	eor	r2,r9,r10
2343	eor	r0,r8,r8,ror#5
2344	add	r4,r4,r12
2345	vld1.32	{q8},[r14,:128]!
2346	and	r2,r2,r8
2347	eor	r12,r0,r8,ror#19
2348	eor	r0,r4,r4,ror#11
2349	eor	r2,r2,r10
2350	vrev32.8	q0,q0
2351	add	r11,r11,r12,ror#6
2352	eor	r12,r4,r5
2353	eor	r0,r0,r4,ror#20
2354	add	r11,r11,r2
2355	vadd.i32	q8,q8,q0
2356	ldr	r2,[sp,#4]
2357	and	r3,r3,r12
2358	add	r7,r7,r11
2359	add	r11,r11,r0,ror#2
2360	eor	r3,r3,r5
2361	add	r10,r10,r2
2362	eor	r2,r8,r9
2363	eor	r0,r7,r7,ror#5
2364	add	r11,r11,r3
2365	and	r2,r2,r7
2366	eor	r3,r0,r7,ror#19
2367	eor	r0,r11,r11,ror#11
2368	eor	r2,r2,r9
2369	add	r10,r10,r3,ror#6
2370	eor	r3,r11,r4
2371	eor	r0,r0,r11,ror#20
2372	add	r10,r10,r2
2373	ldr	r2,[sp,#8]
2374	and	r12,r12,r3
2375	add	r6,r6,r10
2376	add	r10,r10,r0,ror#2
2377	eor	r12,r12,r4
2378	add	r9,r9,r2
2379	eor	r2,r7,r8
2380	eor	r0,r6,r6,ror#5
2381	add	r10,r10,r12
2382	and	r2,r2,r6
2383	eor	r12,r0,r6,ror#19
2384	eor	r0,r10,r10,ror#11
2385	eor	r2,r2,r8
2386	add	r9,r9,r12,ror#6
2387	eor	r12,r10,r11
2388	eor	r0,r0,r10,ror#20
2389	add	r9,r9,r2
2390	ldr	r2,[sp,#12]
2391	and	r3,r3,r12
2392	add	r5,r5,r9
2393	add	r9,r9,r0,ror#2
2394	eor	r3,r3,r11
2395	add	r8,r8,r2
2396	eor	r2,r6,r7
2397	eor	r0,r5,r5,ror#5
2398	add	r9,r9,r3
2399	and	r2,r2,r5
2400	eor	r3,r0,r5,ror#19
2401	eor	r0,r9,r9,ror#11
2402	eor	r2,r2,r7
2403	add	r8,r8,r3,ror#6
2404	eor	r3,r9,r10
2405	eor	r0,r0,r9,ror#20
2406	add	r8,r8,r2
2407	ldr	r2,[sp,#16]
2408	and	r12,r12,r3
2409	add	r4,r4,r8
2410	add	r8,r8,r0,ror#2
2411	eor	r12,r12,r10
2412	vst1.32	{q8},[r1,:128]!
2413	add	r7,r7,r2
2414	eor	r2,r5,r6
2415	eor	r0,r4,r4,ror#5
2416	add	r8,r8,r12
2417	vld1.32	{q8},[r14,:128]!
2418	and	r2,r2,r4
2419	eor	r12,r0,r4,ror#19
2420	eor	r0,r8,r8,ror#11
2421	eor	r2,r2,r6
2422	vrev32.8	q1,q1
2423	add	r7,r7,r12,ror#6
2424	eor	r12,r8,r9
2425	eor	r0,r0,r8,ror#20
2426	add	r7,r7,r2
2427	vadd.i32	q8,q8,q1
2428	ldr	r2,[sp,#20]
2429	and	r3,r3,r12
2430	add	r11,r11,r7
2431	add	r7,r7,r0,ror#2
2432	eor	r3,r3,r9
2433	add	r6,r6,r2
2434	eor	r2,r4,r5
2435	eor	r0,r11,r11,ror#5
2436	add	r7,r7,r3
2437	and	r2,r2,r11
2438	eor	r3,r0,r11,ror#19
2439	eor	r0,r7,r7,ror#11
2440	eor	r2,r2,r5
2441	add	r6,r6,r3,ror#6
2442	eor	r3,r7,r8
2443	eor	r0,r0,r7,ror#20
2444	add	r6,r6,r2
2445	ldr	r2,[sp,#24]
2446	and	r12,r12,r3
2447	add	r10,r10,r6
2448	add	r6,r6,r0,ror#2
2449	eor	r12,r12,r8
2450	add	r5,r5,r2
2451	eor	r2,r11,r4
2452	eor	r0,r10,r10,ror#5
2453	add	r6,r6,r12
2454	and	r2,r2,r10
2455	eor	r12,r0,r10,ror#19
2456	eor	r0,r6,r6,ror#11
2457	eor	r2,r2,r4
2458	add	r5,r5,r12,ror#6
2459	eor	r12,r6,r7
2460	eor	r0,r0,r6,ror#20
2461	add	r5,r5,r2
2462	ldr	r2,[sp,#28]
2463	and	r3,r3,r12
2464	add	r9,r9,r5
2465	add	r5,r5,r0,ror#2
2466	eor	r3,r3,r7
2467	add	r4,r4,r2
2468	eor	r2,r10,r11
2469	eor	r0,r9,r9,ror#5
2470	add	r5,r5,r3
2471	and	r2,r2,r9
2472	eor	r3,r0,r9,ror#19
2473	eor	r0,r5,r5,ror#11
2474	eor	r2,r2,r11
2475	add	r4,r4,r3,ror#6
2476	eor	r3,r5,r6
2477	eor	r0,r0,r5,ror#20
2478	add	r4,r4,r2
2479	ldr	r2,[sp,#32]
2480	and	r12,r12,r3
2481	add	r8,r8,r4
2482	add	r4,r4,r0,ror#2
2483	eor	r12,r12,r6
2484	vst1.32	{q8},[r1,:128]!
2485	add	r11,r11,r2
2486	eor	r2,r9,r10
2487	eor	r0,r8,r8,ror#5
2488	add	r4,r4,r12
2489	vld1.32	{q8},[r14,:128]!
2490	and	r2,r2,r8
2491	eor	r12,r0,r8,ror#19
2492	eor	r0,r4,r4,ror#11
2493	eor	r2,r2,r10
2494	vrev32.8	q2,q2
2495	add	r11,r11,r12,ror#6
2496	eor	r12,r4,r5
2497	eor	r0,r0,r4,ror#20
2498	add	r11,r11,r2
2499	vadd.i32	q8,q8,q2
2500	ldr	r2,[sp,#36]
2501	and	r3,r3,r12
2502	add	r7,r7,r11
2503	add	r11,r11,r0,ror#2
2504	eor	r3,r3,r5
2505	add	r10,r10,r2
2506	eor	r2,r8,r9
2507	eor	r0,r7,r7,ror#5
2508	add	r11,r11,r3
2509	and	r2,r2,r7
2510	eor	r3,r0,r7,ror#19
2511	eor	r0,r11,r11,ror#11
2512	eor	r2,r2,r9
2513	add	r10,r10,r3,ror#6
2514	eor	r3,r11,r4
2515	eor	r0,r0,r11,ror#20
2516	add	r10,r10,r2
2517	ldr	r2,[sp,#40]
2518	and	r12,r12,r3
2519	add	r6,r6,r10
2520	add	r10,r10,r0,ror#2
2521	eor	r12,r12,r4
2522	add	r9,r9,r2
2523	eor	r2,r7,r8
2524	eor	r0,r6,r6,ror#5
2525	add	r10,r10,r12
2526	and	r2,r2,r6
2527	eor	r12,r0,r6,ror#19
2528	eor	r0,r10,r10,ror#11
2529	eor	r2,r2,r8
2530	add	r9,r9,r12,ror#6
2531	eor	r12,r10,r11
2532	eor	r0,r0,r10,ror#20
2533	add	r9,r9,r2
2534	ldr	r2,[sp,#44]
2535	and	r3,r3,r12
2536	add	r5,r5,r9
2537	add	r9,r9,r0,ror#2
2538	eor	r3,r3,r11
2539	add	r8,r8,r2
2540	eor	r2,r6,r7
2541	eor	r0,r5,r5,ror#5
2542	add	r9,r9,r3
2543	and	r2,r2,r5
2544	eor	r3,r0,r5,ror#19
2545	eor	r0,r9,r9,ror#11
2546	eor	r2,r2,r7
2547	add	r8,r8,r3,ror#6
2548	eor	r3,r9,r10
2549	eor	r0,r0,r9,ror#20
2550	add	r8,r8,r2
2551	ldr	r2,[sp,#48]
2552	and	r12,r12,r3
2553	add	r4,r4,r8
2554	add	r8,r8,r0,ror#2
2555	eor	r12,r12,r10
2556	vst1.32	{q8},[r1,:128]!
2557	add	r7,r7,r2
2558	eor	r2,r5,r6
2559	eor	r0,r4,r4,ror#5
2560	add	r8,r8,r12
2561	vld1.32	{q8},[r14,:128]!
2562	and	r2,r2,r4
2563	eor	r12,r0,r4,ror#19
2564	eor	r0,r8,r8,ror#11
2565	eor	r2,r2,r6
2566	vrev32.8	q3,q3
2567	add	r7,r7,r12,ror#6
2568	eor	r12,r8,r9
2569	eor	r0,r0,r8,ror#20
2570	add	r7,r7,r2
2571	vadd.i32	q8,q8,q3
2572	ldr	r2,[sp,#52]
2573	and	r3,r3,r12
2574	add	r11,r11,r7
2575	add	r7,r7,r0,ror#2
2576	eor	r3,r3,r9
2577	add	r6,r6,r2
2578	eor	r2,r4,r5
2579	eor	r0,r11,r11,ror#5
2580	add	r7,r7,r3
2581	and	r2,r2,r11
2582	eor	r3,r0,r11,ror#19
2583	eor	r0,r7,r7,ror#11
2584	eor	r2,r2,r5
2585	add	r6,r6,r3,ror#6
2586	eor	r3,r7,r8
2587	eor	r0,r0,r7,ror#20
2588	add	r6,r6,r2
2589	ldr	r2,[sp,#56]
2590	and	r12,r12,r3
2591	add	r10,r10,r6
2592	add	r6,r6,r0,ror#2
2593	eor	r12,r12,r8
2594	add	r5,r5,r2
2595	eor	r2,r11,r4
2596	eor	r0,r10,r10,ror#5
2597	add	r6,r6,r12
2598	and	r2,r2,r10
2599	eor	r12,r0,r10,ror#19
2600	eor	r0,r6,r6,ror#11
2601	eor	r2,r2,r4
2602	add	r5,r5,r12,ror#6
2603	eor	r12,r6,r7
2604	eor	r0,r0,r6,ror#20
2605	add	r5,r5,r2
2606	ldr	r2,[sp,#60]
2607	and	r3,r3,r12
2608	add	r9,r9,r5
2609	add	r5,r5,r0,ror#2
2610	eor	r3,r3,r7
2611	add	r4,r4,r2
2612	eor	r2,r10,r11
2613	eor	r0,r9,r9,ror#5
2614	add	r5,r5,r3
2615	and	r2,r2,r9
2616	eor	r3,r0,r9,ror#19
2617	eor	r0,r5,r5,ror#11
2618	eor	r2,r2,r11
2619	add	r4,r4,r3,ror#6
2620	eor	r3,r5,r6
2621	eor	r0,r0,r5,ror#20
2622	add	r4,r4,r2
2623	ldr	r2,[sp,#64]
2624	and	r12,r12,r3
2625	add	r8,r8,r4
2626	add	r4,r4,r0,ror#2
2627	eor	r12,r12,r6
2628	vst1.32	{q8},[r1,:128]!
2629	ldr	r0,[r2,#0]
2630	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2631	ldr	r12,[r2,#4]
2632	ldr	r3,[r2,#8]
2633	ldr	r1,[r2,#12]
2634	add	r4,r4,r0			@ accumulate
2635	ldr	r0,[r2,#16]
2636	add	r5,r5,r12
2637	ldr	r12,[r2,#20]
2638	add	r6,r6,r3
2639	ldr	r3,[r2,#24]
2640	add	r7,r7,r1
2641	ldr	r1,[r2,#28]
2642	add	r8,r8,r0
2643	str	r4,[r2],#4
2644	add	r9,r9,r12
2645	str	r5,[r2],#4
2646	add	r10,r10,r3
2647	str	r6,[r2],#4
2648	add	r11,r11,r1
2649	str	r7,[r2],#4
2650	stmia	r2,{r8,r9,r10,r11}
2651
2652	ittte	ne
2653	movne	r1,sp
2654	ldrne	r2,[sp,#0]
2655	eorne	r12,r12,r12
2656	ldreq	sp,[sp,#76]			@ restore original sp
2657	itt	ne
2658	eorne	r3,r5,r6
2659	bne	.L_00_48
2660
2661	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
2662.size	sha256_block_data_order_neon,.-sha256_block_data_order_neon
2663#endif
2664#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2665
2666# if defined(__thumb2__)
2667#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2668# else
2669#  define INST(a,b,c,d)	.byte	a,b,c,d
2670# endif
2671
2672.type	sha256_block_data_order_armv8,%function
2673.align	5
2674sha256_block_data_order_armv8:
2675.LARMv8:
2676	vld1.32	{q0,q1},[r0]
2677	sub	r3,r3,#256+32
2678	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2679	b	.Loop_v8
2680
2681.align	4
2682.Loop_v8:
2683	vld1.8	{q8,q9},[r1]!
2684	vld1.8	{q10,q11},[r1]!
2685	vld1.32	{q12},[r3]!
2686	vrev32.8	q8,q8
2687	vrev32.8	q9,q9
2688	vrev32.8	q10,q10
2689	vrev32.8	q11,q11
2690	vmov	q14,q0	@ offload
2691	vmov	q15,q1
2692	teq	r1,r2
2693	vld1.32	{q13},[r3]!
2694	vadd.i32	q12,q12,q8
2695	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2696	vmov	q2,q0
2697	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2698	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2699	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2700	vld1.32	{q12},[r3]!
2701	vadd.i32	q13,q13,q9
2702	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2703	vmov	q2,q0
2704	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2705	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2706	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2707	vld1.32	{q13},[r3]!
2708	vadd.i32	q12,q12,q10
2709	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2710	vmov	q2,q0
2711	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2712	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2713	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2714	vld1.32	{q12},[r3]!
2715	vadd.i32	q13,q13,q11
2716	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2717	vmov	q2,q0
2718	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2719	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2720	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2721	vld1.32	{q13},[r3]!
2722	vadd.i32	q12,q12,q8
2723	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2724	vmov	q2,q0
2725	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2726	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2727	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2728	vld1.32	{q12},[r3]!
2729	vadd.i32	q13,q13,q9
2730	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2731	vmov	q2,q0
2732	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2733	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2734	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2735	vld1.32	{q13},[r3]!
2736	vadd.i32	q12,q12,q10
2737	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2738	vmov	q2,q0
2739	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2740	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2741	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2742	vld1.32	{q12},[r3]!
2743	vadd.i32	q13,q13,q11
2744	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2745	vmov	q2,q0
2746	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2747	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2748	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2749	vld1.32	{q13},[r3]!
2750	vadd.i32	q12,q12,q8
2751	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2752	vmov	q2,q0
2753	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2754	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2755	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2756	vld1.32	{q12},[r3]!
2757	vadd.i32	q13,q13,q9
2758	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2759	vmov	q2,q0
2760	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2761	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2762	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2763	vld1.32	{q13},[r3]!
2764	vadd.i32	q12,q12,q10
2765	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2766	vmov	q2,q0
2767	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2768	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2769	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2770	vld1.32	{q12},[r3]!
2771	vadd.i32	q13,q13,q11
2772	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2773	vmov	q2,q0
2774	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2775	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2776	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2777	vld1.32	{q13},[r3]!
2778	vadd.i32	q12,q12,q8
2779	vmov	q2,q0
2780	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2781	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2782
2783	vld1.32	{q12},[r3]!
2784	vadd.i32	q13,q13,q9
2785	vmov	q2,q0
2786	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2787	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2788
2789	vld1.32	{q13},[r3]
2790	vadd.i32	q12,q12,q10
2791	sub	r3,r3,#256-16	@ rewind
2792	vmov	q2,q0
2793	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2794	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2795
2796	vadd.i32	q13,q13,q11
2797	vmov	q2,q0
2798	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2799	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2800
2801	vadd.i32	q0,q0,q14
2802	vadd.i32	q1,q1,q15
2803	it	ne
2804	bne	.Loop_v8
2805
2806	vst1.32	{q0,q1},[r0]
2807
2808	bx	lr		@ bx lr
2809.size	sha256_block_data_order_armv8,.-sha256_block_data_order_armv8
2810#endif
2811.byte	83,72,65,50,53,54,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,47,65,82,77,118,56,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
2812.align	2
2813.align	2
2814#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
2815.comm	OPENSSL_armcap_P,4,4
2816.hidden	OPENSSL_armcap_P
2817#endif
2818#endif
2819