1# This file is generated from a similarly-named Perl script in the BoringSSL
2# source tree. Do not edit by hand.
3
4#if defined(__has_feature)
5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM)
6#define OPENSSL_NO_ASM
7#endif
8#endif
9
10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM)
11#if defined(BORINGSSL_PREFIX)
12#include <boringssl_prefix_symbols_asm.h>
13#endif
14.text
15
16.type	beeu_mod_inverse_vartime,@function
17.hidden	beeu_mod_inverse_vartime
18.globl	beeu_mod_inverse_vartime
19.hidden beeu_mod_inverse_vartime
20.align	32
21beeu_mod_inverse_vartime:
22.cfi_startproc
23	pushq	%rbp
24.cfi_adjust_cfa_offset	8
25.cfi_offset	rbp,-16
26	pushq	%r12
27.cfi_adjust_cfa_offset	8
28.cfi_offset	r12,-24
29	pushq	%r13
30.cfi_adjust_cfa_offset	8
31.cfi_offset	r13,-32
32	pushq	%r14
33.cfi_adjust_cfa_offset	8
34.cfi_offset	r14,-40
35	pushq	%r15
36.cfi_adjust_cfa_offset	8
37.cfi_offset	r15,-48
38	pushq	%rbx
39.cfi_adjust_cfa_offset	8
40.cfi_offset	rbx,-56
41	pushq	%rsi
42.cfi_adjust_cfa_offset	8
43.cfi_offset	rsi,-64
44
45	subq	$80,%rsp
46.cfi_adjust_cfa_offset	80
47	movq	%rdi,0(%rsp)
48
49
50	movq	$1,%r8
51	xorq	%r9,%r9
52	xorq	%r10,%r10
53	xorq	%r11,%r11
54	xorq	%rdi,%rdi
55
56	xorq	%r12,%r12
57	xorq	%r13,%r13
58	xorq	%r14,%r14
59	xorq	%r15,%r15
60	xorq	%rbp,%rbp
61
62
63	vmovdqu	0(%rsi),%xmm0
64	vmovdqu	16(%rsi),%xmm1
65	vmovdqu	%xmm0,48(%rsp)
66	vmovdqu	%xmm1,64(%rsp)
67
68	vmovdqu	0(%rdx),%xmm0
69	vmovdqu	16(%rdx),%xmm1
70	vmovdqu	%xmm0,16(%rsp)
71	vmovdqu	%xmm1,32(%rsp)
72
73.Lbeeu_loop:
74	xorq	%rbx,%rbx
75	orq	48(%rsp),%rbx
76	orq	56(%rsp),%rbx
77	orq	64(%rsp),%rbx
78	orq	72(%rsp),%rbx
79	jz	.Lbeeu_loop_end
80
81
82
83
84
85
86
87
88
89
90	movq	$1,%rcx
91
92
93.Lbeeu_shift_loop_XB:
94	movq	%rcx,%rbx
95	andq	48(%rsp),%rbx
96	jnz	.Lbeeu_shift_loop_end_XB
97
98
99	movq	$1,%rbx
100	andq	%r8,%rbx
101	jz	.Lshift1_0
102	addq	0(%rdx),%r8
103	adcq	8(%rdx),%r9
104	adcq	16(%rdx),%r10
105	adcq	24(%rdx),%r11
106	adcq	$0,%rdi
107
108.Lshift1_0:
109	shrdq	$1,%r9,%r8
110	shrdq	$1,%r10,%r9
111	shrdq	$1,%r11,%r10
112	shrdq	$1,%rdi,%r11
113	shrq	$1,%rdi
114
115	shlq	$1,%rcx
116
117
118
119
120
121	cmpq	$0x8000000,%rcx
122	jne	.Lbeeu_shift_loop_XB
123
124.Lbeeu_shift_loop_end_XB:
125	bsfq	%rcx,%rcx
126	testq	%rcx,%rcx
127	jz	.Lbeeu_no_shift_XB
128
129
130
131	movq	8+48(%rsp),%rax
132	movq	16+48(%rsp),%rbx
133	movq	24+48(%rsp),%rsi
134
135	shrdq	%cl,%rax,0+48(%rsp)
136	shrdq	%cl,%rbx,8+48(%rsp)
137	shrdq	%cl,%rsi,16+48(%rsp)
138
139	shrq	%cl,%rsi
140	movq	%rsi,24+48(%rsp)
141
142
143.Lbeeu_no_shift_XB:
144
145	movq	$1,%rcx
146
147
148.Lbeeu_shift_loop_YA:
149	movq	%rcx,%rbx
150	andq	16(%rsp),%rbx
151	jnz	.Lbeeu_shift_loop_end_YA
152
153
154	movq	$1,%rbx
155	andq	%r12,%rbx
156	jz	.Lshift1_1
157	addq	0(%rdx),%r12
158	adcq	8(%rdx),%r13
159	adcq	16(%rdx),%r14
160	adcq	24(%rdx),%r15
161	adcq	$0,%rbp
162
163.Lshift1_1:
164	shrdq	$1,%r13,%r12
165	shrdq	$1,%r14,%r13
166	shrdq	$1,%r15,%r14
167	shrdq	$1,%rbp,%r15
168	shrq	$1,%rbp
169
170	shlq	$1,%rcx
171
172
173
174
175
176	cmpq	$0x8000000,%rcx
177	jne	.Lbeeu_shift_loop_YA
178
179.Lbeeu_shift_loop_end_YA:
180	bsfq	%rcx,%rcx
181	testq	%rcx,%rcx
182	jz	.Lbeeu_no_shift_YA
183
184
185
186	movq	8+16(%rsp),%rax
187	movq	16+16(%rsp),%rbx
188	movq	24+16(%rsp),%rsi
189
190	shrdq	%cl,%rax,0+16(%rsp)
191	shrdq	%cl,%rbx,8+16(%rsp)
192	shrdq	%cl,%rsi,16+16(%rsp)
193
194	shrq	%cl,%rsi
195	movq	%rsi,24+16(%rsp)
196
197
198.Lbeeu_no_shift_YA:
199
200	movq	48(%rsp),%rax
201	movq	56(%rsp),%rbx
202	movq	64(%rsp),%rsi
203	movq	72(%rsp),%rcx
204	subq	16(%rsp),%rax
205	sbbq	24(%rsp),%rbx
206	sbbq	32(%rsp),%rsi
207	sbbq	40(%rsp),%rcx
208	jnc	.Lbeeu_B_bigger_than_A
209
210
211	movq	16(%rsp),%rax
212	movq	24(%rsp),%rbx
213	movq	32(%rsp),%rsi
214	movq	40(%rsp),%rcx
215	subq	48(%rsp),%rax
216	sbbq	56(%rsp),%rbx
217	sbbq	64(%rsp),%rsi
218	sbbq	72(%rsp),%rcx
219	movq	%rax,16(%rsp)
220	movq	%rbx,24(%rsp)
221	movq	%rsi,32(%rsp)
222	movq	%rcx,40(%rsp)
223
224
225	addq	%r8,%r12
226	adcq	%r9,%r13
227	adcq	%r10,%r14
228	adcq	%r11,%r15
229	adcq	%rdi,%rbp
230	jmp	.Lbeeu_loop
231
232.Lbeeu_B_bigger_than_A:
233
234	movq	%rax,48(%rsp)
235	movq	%rbx,56(%rsp)
236	movq	%rsi,64(%rsp)
237	movq	%rcx,72(%rsp)
238
239
240	addq	%r12,%r8
241	adcq	%r13,%r9
242	adcq	%r14,%r10
243	adcq	%r15,%r11
244	adcq	%rbp,%rdi
245
246	jmp	.Lbeeu_loop
247
248.Lbeeu_loop_end:
249
250
251
252
253	movq	16(%rsp),%rbx
254	subq	$1,%rbx
255	orq	24(%rsp),%rbx
256	orq	32(%rsp),%rbx
257	orq	40(%rsp),%rbx
258
259	jnz	.Lbeeu_err
260
261
262
263
264	movq	0(%rdx),%r8
265	movq	8(%rdx),%r9
266	movq	16(%rdx),%r10
267	movq	24(%rdx),%r11
268	xorq	%rdi,%rdi
269
270.Lbeeu_reduction_loop:
271	movq	%r12,16(%rsp)
272	movq	%r13,24(%rsp)
273	movq	%r14,32(%rsp)
274	movq	%r15,40(%rsp)
275	movq	%rbp,48(%rsp)
276
277
278	subq	%r8,%r12
279	sbbq	%r9,%r13
280	sbbq	%r10,%r14
281	sbbq	%r11,%r15
282	sbbq	$0,%rbp
283
284
285	cmovcq	16(%rsp),%r12
286	cmovcq	24(%rsp),%r13
287	cmovcq	32(%rsp),%r14
288	cmovcq	40(%rsp),%r15
289	jnc	.Lbeeu_reduction_loop
290
291
292	subq	%r12,%r8
293	sbbq	%r13,%r9
294	sbbq	%r14,%r10
295	sbbq	%r15,%r11
296
297.Lbeeu_save:
298
299	movq	0(%rsp),%rdi
300
301	movq	%r8,0(%rdi)
302	movq	%r9,8(%rdi)
303	movq	%r10,16(%rdi)
304	movq	%r11,24(%rdi)
305
306
307	movq	$1,%rax
308	jmp	.Lbeeu_finish
309
310.Lbeeu_err:
311
312	xorq	%rax,%rax
313
314.Lbeeu_finish:
315	addq	$80,%rsp
316.cfi_adjust_cfa_offset	-80
317	popq	%rsi
318.cfi_adjust_cfa_offset	-8
319.cfi_restore	rsi
320	popq	%rbx
321.cfi_adjust_cfa_offset	-8
322.cfi_restore	rbx
323	popq	%r15
324.cfi_adjust_cfa_offset	-8
325.cfi_restore	r15
326	popq	%r14
327.cfi_adjust_cfa_offset	-8
328.cfi_restore	r14
329	popq	%r13
330.cfi_adjust_cfa_offset	-8
331.cfi_restore	r13
332	popq	%r12
333.cfi_adjust_cfa_offset	-8
334.cfi_restore	r12
335	popq	%rbp
336.cfi_adjust_cfa_offset	-8
337.cfi_restore	rbp
338	.byte	0xf3,0xc3
339.cfi_endproc
340
341.size	beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime
342#endif
343