1default	rel
2%define XMMWORD
3%define YMMWORD
4%define ZMMWORD
5section	.text code align=64
6
7
8EXTERN	OPENSSL_ia32cap_P
9
10global	bn_mul_mont
11
12ALIGN	16
13bn_mul_mont:
14	mov	QWORD[8+rsp],rdi	;WIN64 prologue
15	mov	QWORD[16+rsp],rsi
16	mov	rax,rsp
17$L$SEH_begin_bn_mul_mont:
18	mov	rdi,rcx
19	mov	rsi,rdx
20	mov	rdx,r8
21	mov	rcx,r9
22	mov	r8,QWORD[40+rsp]
23	mov	r9,QWORD[48+rsp]
24
25
26	test	r9d,3
27	jnz	NEAR $L$mul_enter
28	cmp	r9d,8
29	jb	NEAR $L$mul_enter
30	cmp	rdx,rsi
31	jne	NEAR $L$mul4x_enter
32	test	r9d,7
33	jz	NEAR $L$sqr8x_enter
34	jmp	NEAR $L$mul4x_enter
35
36ALIGN	16
37$L$mul_enter:
38	push	rbx
39	push	rbp
40	push	r12
41	push	r13
42	push	r14
43	push	r15
44
45	mov	r9d,r9d
46	lea	r10,[2+r9]
47	mov	r11,rsp
48	neg	r10
49	lea	rsp,[r10*8+rsp]
50	and	rsp,-1024
51
52	mov	QWORD[8+r9*8+rsp],r11
53$L$mul_body:
54	mov	r12,rdx
55	mov	r8,QWORD[r8]
56	mov	rbx,QWORD[r12]
57	mov	rax,QWORD[rsi]
58
59	xor	r14,r14
60	xor	r15,r15
61
62	mov	rbp,r8
63	mul	rbx
64	mov	r10,rax
65	mov	rax,QWORD[rcx]
66
67	imul	rbp,r10
68	mov	r11,rdx
69
70	mul	rbp
71	add	r10,rax
72	mov	rax,QWORD[8+rsi]
73	adc	rdx,0
74	mov	r13,rdx
75
76	lea	r15,[1+r15]
77	jmp	NEAR $L$1st_enter
78
79ALIGN	16
80$L$1st:
81	add	r13,rax
82	mov	rax,QWORD[r15*8+rsi]
83	adc	rdx,0
84	add	r13,r11
85	mov	r11,r10
86	adc	rdx,0
87	mov	QWORD[((-16))+r15*8+rsp],r13
88	mov	r13,rdx
89
90$L$1st_enter:
91	mul	rbx
92	add	r11,rax
93	mov	rax,QWORD[r15*8+rcx]
94	adc	rdx,0
95	lea	r15,[1+r15]
96	mov	r10,rdx
97
98	mul	rbp
99	cmp	r15,r9
100	jne	NEAR $L$1st
101
102	add	r13,rax
103	mov	rax,QWORD[rsi]
104	adc	rdx,0
105	add	r13,r11
106	adc	rdx,0
107	mov	QWORD[((-16))+r15*8+rsp],r13
108	mov	r13,rdx
109	mov	r11,r10
110
111	xor	rdx,rdx
112	add	r13,r11
113	adc	rdx,0
114	mov	QWORD[((-8))+r9*8+rsp],r13
115	mov	QWORD[r9*8+rsp],rdx
116
117	lea	r14,[1+r14]
118	jmp	NEAR $L$outer
119ALIGN	16
120$L$outer:
121	mov	rbx,QWORD[r14*8+r12]
122	xor	r15,r15
123	mov	rbp,r8
124	mov	r10,QWORD[rsp]
125	mul	rbx
126	add	r10,rax
127	mov	rax,QWORD[rcx]
128	adc	rdx,0
129
130	imul	rbp,r10
131	mov	r11,rdx
132
133	mul	rbp
134	add	r10,rax
135	mov	rax,QWORD[8+rsi]
136	adc	rdx,0
137	mov	r10,QWORD[8+rsp]
138	mov	r13,rdx
139
140	lea	r15,[1+r15]
141	jmp	NEAR $L$inner_enter
142
143ALIGN	16
144$L$inner:
145	add	r13,rax
146	mov	rax,QWORD[r15*8+rsi]
147	adc	rdx,0
148	add	r13,r10
149	mov	r10,QWORD[r15*8+rsp]
150	adc	rdx,0
151	mov	QWORD[((-16))+r15*8+rsp],r13
152	mov	r13,rdx
153
154$L$inner_enter:
155	mul	rbx
156	add	r11,rax
157	mov	rax,QWORD[r15*8+rcx]
158	adc	rdx,0
159	add	r10,r11
160	mov	r11,rdx
161	adc	r11,0
162	lea	r15,[1+r15]
163
164	mul	rbp
165	cmp	r15,r9
166	jne	NEAR $L$inner
167
168	add	r13,rax
169	mov	rax,QWORD[rsi]
170	adc	rdx,0
171	add	r13,r10
172	mov	r10,QWORD[r15*8+rsp]
173	adc	rdx,0
174	mov	QWORD[((-16))+r15*8+rsp],r13
175	mov	r13,rdx
176
177	xor	rdx,rdx
178	add	r13,r11
179	adc	rdx,0
180	add	r13,r10
181	adc	rdx,0
182	mov	QWORD[((-8))+r9*8+rsp],r13
183	mov	QWORD[r9*8+rsp],rdx
184
185	lea	r14,[1+r14]
186	cmp	r14,r9
187	jb	NEAR $L$outer
188
189	xor	r14,r14
190	mov	rax,QWORD[rsp]
191	lea	rsi,[rsp]
192	mov	r15,r9
193	jmp	NEAR $L$sub
194ALIGN	16
195$L$sub:	sbb	rax,QWORD[r14*8+rcx]
196	mov	QWORD[r14*8+rdi],rax
197	mov	rax,QWORD[8+r14*8+rsi]
198	lea	r14,[1+r14]
199	dec	r15
200	jnz	NEAR $L$sub
201
202	sbb	rax,0
203	xor	r14,r14
204	mov	r15,r9
205ALIGN	16
206$L$copy:
207	mov	rsi,QWORD[r14*8+rsp]
208	mov	rcx,QWORD[r14*8+rdi]
209	xor	rsi,rcx
210	and	rsi,rax
211	xor	rsi,rcx
212	mov	QWORD[r14*8+rsp],r14
213	mov	QWORD[r14*8+rdi],rsi
214	lea	r14,[1+r14]
215	sub	r15,1
216	jnz	NEAR $L$copy
217
218	mov	rsi,QWORD[8+r9*8+rsp]
219	mov	rax,1
220	mov	r15,QWORD[rsi]
221	mov	r14,QWORD[8+rsi]
222	mov	r13,QWORD[16+rsi]
223	mov	r12,QWORD[24+rsi]
224	mov	rbp,QWORD[32+rsi]
225	mov	rbx,QWORD[40+rsi]
226	lea	rsp,[48+rsi]
227$L$mul_epilogue:
228	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
229	mov	rsi,QWORD[16+rsp]
230	DB	0F3h,0C3h		;repret
231$L$SEH_end_bn_mul_mont:
232
233ALIGN	16
234bn_mul4x_mont:
235	mov	QWORD[8+rsp],rdi	;WIN64 prologue
236	mov	QWORD[16+rsp],rsi
237	mov	rax,rsp
238$L$SEH_begin_bn_mul4x_mont:
239	mov	rdi,rcx
240	mov	rsi,rdx
241	mov	rdx,r8
242	mov	rcx,r9
243	mov	r8,QWORD[40+rsp]
244	mov	r9,QWORD[48+rsp]
245
246
247$L$mul4x_enter:
248	push	rbx
249	push	rbp
250	push	r12
251	push	r13
252	push	r14
253	push	r15
254
255	mov	r9d,r9d
256	lea	r10,[4+r9]
257	mov	r11,rsp
258	neg	r10
259	lea	rsp,[r10*8+rsp]
260	and	rsp,-1024
261
262	mov	QWORD[8+r9*8+rsp],r11
263$L$mul4x_body:
264	mov	QWORD[16+r9*8+rsp],rdi
265	mov	r12,rdx
266	mov	r8,QWORD[r8]
267	mov	rbx,QWORD[r12]
268	mov	rax,QWORD[rsi]
269
270	xor	r14,r14
271	xor	r15,r15
272
273	mov	rbp,r8
274	mul	rbx
275	mov	r10,rax
276	mov	rax,QWORD[rcx]
277
278	imul	rbp,r10
279	mov	r11,rdx
280
281	mul	rbp
282	add	r10,rax
283	mov	rax,QWORD[8+rsi]
284	adc	rdx,0
285	mov	rdi,rdx
286
287	mul	rbx
288	add	r11,rax
289	mov	rax,QWORD[8+rcx]
290	adc	rdx,0
291	mov	r10,rdx
292
293	mul	rbp
294	add	rdi,rax
295	mov	rax,QWORD[16+rsi]
296	adc	rdx,0
297	add	rdi,r11
298	lea	r15,[4+r15]
299	adc	rdx,0
300	mov	QWORD[rsp],rdi
301	mov	r13,rdx
302	jmp	NEAR $L$1st4x
303ALIGN	16
304$L$1st4x:
305	mul	rbx
306	add	r10,rax
307	mov	rax,QWORD[((-16))+r15*8+rcx]
308	adc	rdx,0
309	mov	r11,rdx
310
311	mul	rbp
312	add	r13,rax
313	mov	rax,QWORD[((-8))+r15*8+rsi]
314	adc	rdx,0
315	add	r13,r10
316	adc	rdx,0
317	mov	QWORD[((-24))+r15*8+rsp],r13
318	mov	rdi,rdx
319
320	mul	rbx
321	add	r11,rax
322	mov	rax,QWORD[((-8))+r15*8+rcx]
323	adc	rdx,0
324	mov	r10,rdx
325
326	mul	rbp
327	add	rdi,rax
328	mov	rax,QWORD[r15*8+rsi]
329	adc	rdx,0
330	add	rdi,r11
331	adc	rdx,0
332	mov	QWORD[((-16))+r15*8+rsp],rdi
333	mov	r13,rdx
334
335	mul	rbx
336	add	r10,rax
337	mov	rax,QWORD[r15*8+rcx]
338	adc	rdx,0
339	mov	r11,rdx
340
341	mul	rbp
342	add	r13,rax
343	mov	rax,QWORD[8+r15*8+rsi]
344	adc	rdx,0
345	add	r13,r10
346	adc	rdx,0
347	mov	QWORD[((-8))+r15*8+rsp],r13
348	mov	rdi,rdx
349
350	mul	rbx
351	add	r11,rax
352	mov	rax,QWORD[8+r15*8+rcx]
353	adc	rdx,0
354	lea	r15,[4+r15]
355	mov	r10,rdx
356
357	mul	rbp
358	add	rdi,rax
359	mov	rax,QWORD[((-16))+r15*8+rsi]
360	adc	rdx,0
361	add	rdi,r11
362	adc	rdx,0
363	mov	QWORD[((-32))+r15*8+rsp],rdi
364	mov	r13,rdx
365	cmp	r15,r9
366	jb	NEAR $L$1st4x
367
368	mul	rbx
369	add	r10,rax
370	mov	rax,QWORD[((-16))+r15*8+rcx]
371	adc	rdx,0
372	mov	r11,rdx
373
374	mul	rbp
375	add	r13,rax
376	mov	rax,QWORD[((-8))+r15*8+rsi]
377	adc	rdx,0
378	add	r13,r10
379	adc	rdx,0
380	mov	QWORD[((-24))+r15*8+rsp],r13
381	mov	rdi,rdx
382
383	mul	rbx
384	add	r11,rax
385	mov	rax,QWORD[((-8))+r15*8+rcx]
386	adc	rdx,0
387	mov	r10,rdx
388
389	mul	rbp
390	add	rdi,rax
391	mov	rax,QWORD[rsi]
392	adc	rdx,0
393	add	rdi,r11
394	adc	rdx,0
395	mov	QWORD[((-16))+r15*8+rsp],rdi
396	mov	r13,rdx
397
398	xor	rdi,rdi
399	add	r13,r10
400	adc	rdi,0
401	mov	QWORD[((-8))+r15*8+rsp],r13
402	mov	QWORD[r15*8+rsp],rdi
403
404	lea	r14,[1+r14]
405ALIGN	4
406$L$outer4x:
407	mov	rbx,QWORD[r14*8+r12]
408	xor	r15,r15
409	mov	r10,QWORD[rsp]
410	mov	rbp,r8
411	mul	rbx
412	add	r10,rax
413	mov	rax,QWORD[rcx]
414	adc	rdx,0
415
416	imul	rbp,r10
417	mov	r11,rdx
418
419	mul	rbp
420	add	r10,rax
421	mov	rax,QWORD[8+rsi]
422	adc	rdx,0
423	mov	rdi,rdx
424
425	mul	rbx
426	add	r11,rax
427	mov	rax,QWORD[8+rcx]
428	adc	rdx,0
429	add	r11,QWORD[8+rsp]
430	adc	rdx,0
431	mov	r10,rdx
432
433	mul	rbp
434	add	rdi,rax
435	mov	rax,QWORD[16+rsi]
436	adc	rdx,0
437	add	rdi,r11
438	lea	r15,[4+r15]
439	adc	rdx,0
440	mov	QWORD[rsp],rdi
441	mov	r13,rdx
442	jmp	NEAR $L$inner4x
443ALIGN	16
444$L$inner4x:
445	mul	rbx
446	add	r10,rax
447	mov	rax,QWORD[((-16))+r15*8+rcx]
448	adc	rdx,0
449	add	r10,QWORD[((-16))+r15*8+rsp]
450	adc	rdx,0
451	mov	r11,rdx
452
453	mul	rbp
454	add	r13,rax
455	mov	rax,QWORD[((-8))+r15*8+rsi]
456	adc	rdx,0
457	add	r13,r10
458	adc	rdx,0
459	mov	QWORD[((-24))+r15*8+rsp],r13
460	mov	rdi,rdx
461
462	mul	rbx
463	add	r11,rax
464	mov	rax,QWORD[((-8))+r15*8+rcx]
465	adc	rdx,0
466	add	r11,QWORD[((-8))+r15*8+rsp]
467	adc	rdx,0
468	mov	r10,rdx
469
470	mul	rbp
471	add	rdi,rax
472	mov	rax,QWORD[r15*8+rsi]
473	adc	rdx,0
474	add	rdi,r11
475	adc	rdx,0
476	mov	QWORD[((-16))+r15*8+rsp],rdi
477	mov	r13,rdx
478
479	mul	rbx
480	add	r10,rax
481	mov	rax,QWORD[r15*8+rcx]
482	adc	rdx,0
483	add	r10,QWORD[r15*8+rsp]
484	adc	rdx,0
485	mov	r11,rdx
486
487	mul	rbp
488	add	r13,rax
489	mov	rax,QWORD[8+r15*8+rsi]
490	adc	rdx,0
491	add	r13,r10
492	adc	rdx,0
493	mov	QWORD[((-8))+r15*8+rsp],r13
494	mov	rdi,rdx
495
496	mul	rbx
497	add	r11,rax
498	mov	rax,QWORD[8+r15*8+rcx]
499	adc	rdx,0
500	add	r11,QWORD[8+r15*8+rsp]
501	adc	rdx,0
502	lea	r15,[4+r15]
503	mov	r10,rdx
504
505	mul	rbp
506	add	rdi,rax
507	mov	rax,QWORD[((-16))+r15*8+rsi]
508	adc	rdx,0
509	add	rdi,r11
510	adc	rdx,0
511	mov	QWORD[((-32))+r15*8+rsp],rdi
512	mov	r13,rdx
513	cmp	r15,r9
514	jb	NEAR $L$inner4x
515
516	mul	rbx
517	add	r10,rax
518	mov	rax,QWORD[((-16))+r15*8+rcx]
519	adc	rdx,0
520	add	r10,QWORD[((-16))+r15*8+rsp]
521	adc	rdx,0
522	mov	r11,rdx
523
524	mul	rbp
525	add	r13,rax
526	mov	rax,QWORD[((-8))+r15*8+rsi]
527	adc	rdx,0
528	add	r13,r10
529	adc	rdx,0
530	mov	QWORD[((-24))+r15*8+rsp],r13
531	mov	rdi,rdx
532
533	mul	rbx
534	add	r11,rax
535	mov	rax,QWORD[((-8))+r15*8+rcx]
536	adc	rdx,0
537	add	r11,QWORD[((-8))+r15*8+rsp]
538	adc	rdx,0
539	lea	r14,[1+r14]
540	mov	r10,rdx
541
542	mul	rbp
543	add	rdi,rax
544	mov	rax,QWORD[rsi]
545	adc	rdx,0
546	add	rdi,r11
547	adc	rdx,0
548	mov	QWORD[((-16))+r15*8+rsp],rdi
549	mov	r13,rdx
550
551	xor	rdi,rdi
552	add	r13,r10
553	adc	rdi,0
554	add	r13,QWORD[r9*8+rsp]
555	adc	rdi,0
556	mov	QWORD[((-8))+r15*8+rsp],r13
557	mov	QWORD[r15*8+rsp],rdi
558
559	cmp	r14,r9
560	jb	NEAR $L$outer4x
561	mov	rdi,QWORD[16+r9*8+rsp]
562	mov	rax,QWORD[rsp]
563	mov	rdx,QWORD[8+rsp]
564	shr	r9,2
565	lea	rsi,[rsp]
566	xor	r14,r14
567
568	sub	rax,QWORD[rcx]
569	mov	rbx,QWORD[16+rsi]
570	mov	rbp,QWORD[24+rsi]
571	sbb	rdx,QWORD[8+rcx]
572	lea	r15,[((-1))+r9]
573	jmp	NEAR $L$sub4x
574ALIGN	16
575$L$sub4x:
576	mov	QWORD[r14*8+rdi],rax
577	mov	QWORD[8+r14*8+rdi],rdx
578	sbb	rbx,QWORD[16+r14*8+rcx]
579	mov	rax,QWORD[32+r14*8+rsi]
580	mov	rdx,QWORD[40+r14*8+rsi]
581	sbb	rbp,QWORD[24+r14*8+rcx]
582	mov	QWORD[16+r14*8+rdi],rbx
583	mov	QWORD[24+r14*8+rdi],rbp
584	sbb	rax,QWORD[32+r14*8+rcx]
585	mov	rbx,QWORD[48+r14*8+rsi]
586	mov	rbp,QWORD[56+r14*8+rsi]
587	sbb	rdx,QWORD[40+r14*8+rcx]
588	lea	r14,[4+r14]
589	dec	r15
590	jnz	NEAR $L$sub4x
591
592	mov	QWORD[r14*8+rdi],rax
593	mov	rax,QWORD[32+r14*8+rsi]
594	sbb	rbx,QWORD[16+r14*8+rcx]
595	mov	QWORD[8+r14*8+rdi],rdx
596	sbb	rbp,QWORD[24+r14*8+rcx]
597	mov	QWORD[16+r14*8+rdi],rbx
598
599	sbb	rax,0
600DB 66h, 48h, 0fh, 6eh, 0c0h
601	punpcklqdq	xmm0,xmm0
602	mov	QWORD[24+r14*8+rdi],rbp
603	xor	r14,r14
604
605	mov	r15,r9
606	pxor	xmm5,xmm5
607	jmp	NEAR $L$copy4x
608ALIGN	16
609$L$copy4x:
610	movdqu	xmm2,XMMWORD[r14*1+rsp]
611	movdqu	xmm4,XMMWORD[16+r14*1+rsp]
612	movdqu	xmm1,XMMWORD[r14*1+rdi]
613	movdqu	xmm3,XMMWORD[16+r14*1+rdi]
614	pxor	xmm2,xmm1
615	pxor	xmm4,xmm3
616	pand	xmm2,xmm0
617	pand	xmm4,xmm0
618	pxor	xmm2,xmm1
619	pxor	xmm4,xmm3
620	movdqu	XMMWORD[r14*1+rdi],xmm2
621	movdqu	XMMWORD[16+r14*1+rdi],xmm4
622	movdqa	XMMWORD[r14*1+rsp],xmm5
623	movdqa	XMMWORD[16+r14*1+rsp],xmm5
624
625	lea	r14,[32+r14]
626	dec	r15
627	jnz	NEAR $L$copy4x
628
629	shl	r9,2
630	mov	rsi,QWORD[8+r9*8+rsp]
631	mov	rax,1
632	mov	r15,QWORD[rsi]
633	mov	r14,QWORD[8+rsi]
634	mov	r13,QWORD[16+rsi]
635	mov	r12,QWORD[24+rsi]
636	mov	rbp,QWORD[32+rsi]
637	mov	rbx,QWORD[40+rsi]
638	lea	rsp,[48+rsi]
639$L$mul4x_epilogue:
640	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
641	mov	rsi,QWORD[16+rsp]
642	DB	0F3h,0C3h		;repret
643$L$SEH_end_bn_mul4x_mont:
644EXTERN	bn_sqr8x_internal
645
646
647ALIGN	32
648bn_sqr8x_mont:
649	mov	QWORD[8+rsp],rdi	;WIN64 prologue
650	mov	QWORD[16+rsp],rsi
651	mov	rax,rsp
652$L$SEH_begin_bn_sqr8x_mont:
653	mov	rdi,rcx
654	mov	rsi,rdx
655	mov	rdx,r8
656	mov	rcx,r9
657	mov	r8,QWORD[40+rsp]
658	mov	r9,QWORD[48+rsp]
659
660
661$L$sqr8x_enter:
662	mov	rax,rsp
663	push	rbx
664	push	rbp
665	push	r12
666	push	r13
667	push	r14
668	push	r15
669
670	mov	r10d,r9d
671	shl	r9d,3
672	shl	r10,3+2
673	neg	r9
674
675
676
677
678
679
680	lea	r11,[((-64))+r9*4+rsp]
681	mov	r8,QWORD[r8]
682	sub	r11,rsi
683	and	r11,4095
684	cmp	r10,r11
685	jb	NEAR $L$sqr8x_sp_alt
686	sub	rsp,r11
687	lea	rsp,[((-64))+r9*4+rsp]
688	jmp	NEAR $L$sqr8x_sp_done
689
690ALIGN	32
691$L$sqr8x_sp_alt:
692	lea	r10,[((4096-64))+r9*4]
693	lea	rsp,[((-64))+r9*4+rsp]
694	sub	r11,r10
695	mov	r10,0
696	cmovc	r11,r10
697	sub	rsp,r11
698$L$sqr8x_sp_done:
699	and	rsp,-64
700	mov	r10,r9
701	neg	r9
702
703	lea	r11,[64+r9*2+rsp]
704	mov	QWORD[32+rsp],r8
705	mov	QWORD[40+rsp],rax
706$L$sqr8x_body:
707
708	mov	rbp,r9
709DB	102,73,15,110,211
710	shr	rbp,3+2
711	mov	eax,DWORD[((OPENSSL_ia32cap_P+8))]
712	jmp	NEAR $L$sqr8x_copy_n
713
714ALIGN	32
715$L$sqr8x_copy_n:
716	movq	xmm0,QWORD[rcx]
717	movq	xmm1,QWORD[8+rcx]
718	movq	xmm3,QWORD[16+rcx]
719	movq	xmm4,QWORD[24+rcx]
720	lea	rcx,[32+rcx]
721	movdqa	XMMWORD[r11],xmm0
722	movdqa	XMMWORD[16+r11],xmm1
723	movdqa	XMMWORD[32+r11],xmm3
724	movdqa	XMMWORD[48+r11],xmm4
725	lea	r11,[64+r11]
726	dec	rbp
727	jnz	NEAR $L$sqr8x_copy_n
728
729	pxor	xmm0,xmm0
730DB	102,72,15,110,207
731DB	102,73,15,110,218
732	call	bn_sqr8x_internal
733
734	pxor	xmm0,xmm0
735	lea	rax,[48+rsp]
736	lea	rdx,[64+r9*2+rsp]
737	shr	r9,3+2
738	mov	rsi,QWORD[40+rsp]
739	jmp	NEAR $L$sqr8x_zero
740
741ALIGN	32
742$L$sqr8x_zero:
743	movdqa	XMMWORD[rax],xmm0
744	movdqa	XMMWORD[16+rax],xmm0
745	movdqa	XMMWORD[32+rax],xmm0
746	movdqa	XMMWORD[48+rax],xmm0
747	lea	rax,[64+rax]
748	movdqa	XMMWORD[rdx],xmm0
749	movdqa	XMMWORD[16+rdx],xmm0
750	movdqa	XMMWORD[32+rdx],xmm0
751	movdqa	XMMWORD[48+rdx],xmm0
752	lea	rdx,[64+rdx]
753	dec	r9
754	jnz	NEAR $L$sqr8x_zero
755
756	mov	rax,1
757	mov	r15,QWORD[((-48))+rsi]
758	mov	r14,QWORD[((-40))+rsi]
759	mov	r13,QWORD[((-32))+rsi]
760	mov	r12,QWORD[((-24))+rsi]
761	mov	rbp,QWORD[((-16))+rsi]
762	mov	rbx,QWORD[((-8))+rsi]
763	lea	rsp,[rsi]
764$L$sqr8x_epilogue:
765	mov	rdi,QWORD[8+rsp]	;WIN64 epilogue
766	mov	rsi,QWORD[16+rsp]
767	DB	0F3h,0C3h		;repret
768$L$SEH_end_bn_sqr8x_mont:
769DB	77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105
770DB	112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56
771DB	54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83
772DB	32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115
773DB	115,108,46,111,114,103,62,0
774ALIGN	16
775EXTERN	__imp_RtlVirtualUnwind
776
777ALIGN	16
778mul_handler:
779	push	rsi
780	push	rdi
781	push	rbx
782	push	rbp
783	push	r12
784	push	r13
785	push	r14
786	push	r15
787	pushfq
788	sub	rsp,64
789
790	mov	rax,QWORD[120+r8]
791	mov	rbx,QWORD[248+r8]
792
793	mov	rsi,QWORD[8+r9]
794	mov	r11,QWORD[56+r9]
795
796	mov	r10d,DWORD[r11]
797	lea	r10,[r10*1+rsi]
798	cmp	rbx,r10
799	jb	NEAR $L$common_seh_tail
800
801	mov	rax,QWORD[152+r8]
802
803	mov	r10d,DWORD[4+r11]
804	lea	r10,[r10*1+rsi]
805	cmp	rbx,r10
806	jae	NEAR $L$common_seh_tail
807
808	mov	r10,QWORD[192+r8]
809	mov	rax,QWORD[8+r10*8+rax]
810	lea	rax,[48+rax]
811
812	mov	rbx,QWORD[((-8))+rax]
813	mov	rbp,QWORD[((-16))+rax]
814	mov	r12,QWORD[((-24))+rax]
815	mov	r13,QWORD[((-32))+rax]
816	mov	r14,QWORD[((-40))+rax]
817	mov	r15,QWORD[((-48))+rax]
818	mov	QWORD[144+r8],rbx
819	mov	QWORD[160+r8],rbp
820	mov	QWORD[216+r8],r12
821	mov	QWORD[224+r8],r13
822	mov	QWORD[232+r8],r14
823	mov	QWORD[240+r8],r15
824
825	jmp	NEAR $L$common_seh_tail
826
827
828
829ALIGN	16
830sqr_handler:
831	push	rsi
832	push	rdi
833	push	rbx
834	push	rbp
835	push	r12
836	push	r13
837	push	r14
838	push	r15
839	pushfq
840	sub	rsp,64
841
842	mov	rax,QWORD[120+r8]
843	mov	rbx,QWORD[248+r8]
844
845	mov	rsi,QWORD[8+r9]
846	mov	r11,QWORD[56+r9]
847
848	mov	r10d,DWORD[r11]
849	lea	r10,[r10*1+rsi]
850	cmp	rbx,r10
851	jb	NEAR $L$common_seh_tail
852
853	mov	rax,QWORD[152+r8]
854
855	mov	r10d,DWORD[4+r11]
856	lea	r10,[r10*1+rsi]
857	cmp	rbx,r10
858	jae	NEAR $L$common_seh_tail
859
860	mov	rax,QWORD[40+rax]
861
862	mov	rbx,QWORD[((-8))+rax]
863	mov	rbp,QWORD[((-16))+rax]
864	mov	r12,QWORD[((-24))+rax]
865	mov	r13,QWORD[((-32))+rax]
866	mov	r14,QWORD[((-40))+rax]
867	mov	r15,QWORD[((-48))+rax]
868	mov	QWORD[144+r8],rbx
869	mov	QWORD[160+r8],rbp
870	mov	QWORD[216+r8],r12
871	mov	QWORD[224+r8],r13
872	mov	QWORD[232+r8],r14
873	mov	QWORD[240+r8],r15
874
875$L$common_seh_tail:
876	mov	rdi,QWORD[8+rax]
877	mov	rsi,QWORD[16+rax]
878	mov	QWORD[152+r8],rax
879	mov	QWORD[168+r8],rsi
880	mov	QWORD[176+r8],rdi
881
882	mov	rdi,QWORD[40+r9]
883	mov	rsi,r8
884	mov	ecx,154
885	DD	0xa548f3fc
886
887	mov	rsi,r9
888	xor	rcx,rcx
889	mov	rdx,QWORD[8+rsi]
890	mov	r8,QWORD[rsi]
891	mov	r9,QWORD[16+rsi]
892	mov	r10,QWORD[40+rsi]
893	lea	r11,[56+rsi]
894	lea	r12,[24+rsi]
895	mov	QWORD[32+rsp],r10
896	mov	QWORD[40+rsp],r11
897	mov	QWORD[48+rsp],r12
898	mov	QWORD[56+rsp],rcx
899	call	QWORD[__imp_RtlVirtualUnwind]
900
901	mov	eax,1
902	add	rsp,64
903	popfq
904	pop	r15
905	pop	r14
906	pop	r13
907	pop	r12
908	pop	rbp
909	pop	rbx
910	pop	rdi
911	pop	rsi
912	DB	0F3h,0C3h		;repret
913
914
915section	.pdata rdata align=4
916ALIGN	4
917	DD	$L$SEH_begin_bn_mul_mont wrt ..imagebase
918	DD	$L$SEH_end_bn_mul_mont wrt ..imagebase
919	DD	$L$SEH_info_bn_mul_mont wrt ..imagebase
920
921	DD	$L$SEH_begin_bn_mul4x_mont wrt ..imagebase
922	DD	$L$SEH_end_bn_mul4x_mont wrt ..imagebase
923	DD	$L$SEH_info_bn_mul4x_mont wrt ..imagebase
924
925	DD	$L$SEH_begin_bn_sqr8x_mont wrt ..imagebase
926	DD	$L$SEH_end_bn_sqr8x_mont wrt ..imagebase
927	DD	$L$SEH_info_bn_sqr8x_mont wrt ..imagebase
928section	.xdata rdata align=8
929ALIGN	8
930$L$SEH_info_bn_mul_mont:
931DB	9,0,0,0
932	DD	mul_handler wrt ..imagebase
933	DD	$L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase
934$L$SEH_info_bn_mul4x_mont:
935DB	9,0,0,0
936	DD	mul_handler wrt ..imagebase
937	DD	$L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase
938$L$SEH_info_bn_sqr8x_mont:
939DB	9,0,0,0
940	DD	sqr_handler wrt ..imagebase
941	DD	$L$sqr8x_body wrt ..imagebase,$L$sqr8x_epilogue wrt ..imagebase
942