1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#if defined(BORINGSSL_PREFIX) 12#include <boringssl_prefix_symbols_asm.h> 13#endif 14.text 15 16.type beeu_mod_inverse_vartime,@function 17.hidden beeu_mod_inverse_vartime 18.globl beeu_mod_inverse_vartime 19.hidden beeu_mod_inverse_vartime 20.align 32 21beeu_mod_inverse_vartime: 22.cfi_startproc 23 pushq %rbp 24.cfi_adjust_cfa_offset 8 25.cfi_offset rbp,-16 26 pushq %r12 27.cfi_adjust_cfa_offset 8 28.cfi_offset r12,-24 29 pushq %r13 30.cfi_adjust_cfa_offset 8 31.cfi_offset r13,-32 32 pushq %r14 33.cfi_adjust_cfa_offset 8 34.cfi_offset r14,-40 35 pushq %r15 36.cfi_adjust_cfa_offset 8 37.cfi_offset r15,-48 38 pushq %rbx 39.cfi_adjust_cfa_offset 8 40.cfi_offset rbx,-56 41 pushq %rsi 42.cfi_adjust_cfa_offset 8 43.cfi_offset rsi,-64 44 45 subq $80,%rsp 46.cfi_adjust_cfa_offset 80 47 movq %rdi,0(%rsp) 48 49 50 movq $1,%r8 51 xorq %r9,%r9 52 xorq %r10,%r10 53 xorq %r11,%r11 54 xorq %rdi,%rdi 55 56 xorq %r12,%r12 57 xorq %r13,%r13 58 xorq %r14,%r14 59 xorq %r15,%r15 60 xorq %rbp,%rbp 61 62 63 vmovdqu 0(%rsi),%xmm0 64 vmovdqu 16(%rsi),%xmm1 65 vmovdqu %xmm0,48(%rsp) 66 vmovdqu %xmm1,64(%rsp) 67 68 vmovdqu 0(%rdx),%xmm0 69 vmovdqu 16(%rdx),%xmm1 70 vmovdqu %xmm0,16(%rsp) 71 vmovdqu %xmm1,32(%rsp) 72 73.Lbeeu_loop: 74 xorq %rbx,%rbx 75 orq 48(%rsp),%rbx 76 orq 56(%rsp),%rbx 77 orq 64(%rsp),%rbx 78 orq 72(%rsp),%rbx 79 jz .Lbeeu_loop_end 80 81 82 83 84 85 86 87 88 89 90 movq $1,%rcx 91 92 93.Lbeeu_shift_loop_XB: 94 movq %rcx,%rbx 95 andq 48(%rsp),%rbx 96 jnz .Lbeeu_shift_loop_end_XB 97 98 99 movq $1,%rbx 100 andq %r8,%rbx 101 jz .Lshift1_0 102 addq 0(%rdx),%r8 103 adcq 8(%rdx),%r9 104 adcq 16(%rdx),%r10 105 adcq 24(%rdx),%r11 106 adcq $0,%rdi 107 108.Lshift1_0: 109 shrdq $1,%r9,%r8 110 shrdq $1,%r10,%r9 111 shrdq $1,%r11,%r10 112 shrdq $1,%rdi,%r11 113 shrq $1,%rdi 114 115 shlq $1,%rcx 116 117 118 119 120 121 cmpq $0x8000000,%rcx 122 jne .Lbeeu_shift_loop_XB 123 124.Lbeeu_shift_loop_end_XB: 125 bsfq %rcx,%rcx 126 testq %rcx,%rcx 127 jz .Lbeeu_no_shift_XB 128 129 130 131 movq 8+48(%rsp),%rax 132 movq 16+48(%rsp),%rbx 133 movq 24+48(%rsp),%rsi 134 135 shrdq %cl,%rax,0+48(%rsp) 136 shrdq %cl,%rbx,8+48(%rsp) 137 shrdq %cl,%rsi,16+48(%rsp) 138 139 shrq %cl,%rsi 140 movq %rsi,24+48(%rsp) 141 142 143.Lbeeu_no_shift_XB: 144 145 movq $1,%rcx 146 147 148.Lbeeu_shift_loop_YA: 149 movq %rcx,%rbx 150 andq 16(%rsp),%rbx 151 jnz .Lbeeu_shift_loop_end_YA 152 153 154 movq $1,%rbx 155 andq %r12,%rbx 156 jz .Lshift1_1 157 addq 0(%rdx),%r12 158 adcq 8(%rdx),%r13 159 adcq 16(%rdx),%r14 160 adcq 24(%rdx),%r15 161 adcq $0,%rbp 162 163.Lshift1_1: 164 shrdq $1,%r13,%r12 165 shrdq $1,%r14,%r13 166 shrdq $1,%r15,%r14 167 shrdq $1,%rbp,%r15 168 shrq $1,%rbp 169 170 shlq $1,%rcx 171 172 173 174 175 176 cmpq $0x8000000,%rcx 177 jne .Lbeeu_shift_loop_YA 178 179.Lbeeu_shift_loop_end_YA: 180 bsfq %rcx,%rcx 181 testq %rcx,%rcx 182 jz .Lbeeu_no_shift_YA 183 184 185 186 movq 8+16(%rsp),%rax 187 movq 16+16(%rsp),%rbx 188 movq 24+16(%rsp),%rsi 189 190 shrdq %cl,%rax,0+16(%rsp) 191 shrdq %cl,%rbx,8+16(%rsp) 192 shrdq %cl,%rsi,16+16(%rsp) 193 194 shrq %cl,%rsi 195 movq %rsi,24+16(%rsp) 196 197 198.Lbeeu_no_shift_YA: 199 200 movq 48(%rsp),%rax 201 movq 56(%rsp),%rbx 202 movq 64(%rsp),%rsi 203 movq 72(%rsp),%rcx 204 subq 16(%rsp),%rax 205 sbbq 24(%rsp),%rbx 206 sbbq 32(%rsp),%rsi 207 sbbq 40(%rsp),%rcx 208 jnc .Lbeeu_B_bigger_than_A 209 210 211 movq 16(%rsp),%rax 212 movq 24(%rsp),%rbx 213 movq 32(%rsp),%rsi 214 movq 40(%rsp),%rcx 215 subq 48(%rsp),%rax 216 sbbq 56(%rsp),%rbx 217 sbbq 64(%rsp),%rsi 218 sbbq 72(%rsp),%rcx 219 movq %rax,16(%rsp) 220 movq %rbx,24(%rsp) 221 movq %rsi,32(%rsp) 222 movq %rcx,40(%rsp) 223 224 225 addq %r8,%r12 226 adcq %r9,%r13 227 adcq %r10,%r14 228 adcq %r11,%r15 229 adcq %rdi,%rbp 230 jmp .Lbeeu_loop 231 232.Lbeeu_B_bigger_than_A: 233 234 movq %rax,48(%rsp) 235 movq %rbx,56(%rsp) 236 movq %rsi,64(%rsp) 237 movq %rcx,72(%rsp) 238 239 240 addq %r12,%r8 241 adcq %r13,%r9 242 adcq %r14,%r10 243 adcq %r15,%r11 244 adcq %rbp,%rdi 245 246 jmp .Lbeeu_loop 247 248.Lbeeu_loop_end: 249 250 251 252 253 movq 16(%rsp),%rbx 254 subq $1,%rbx 255 orq 24(%rsp),%rbx 256 orq 32(%rsp),%rbx 257 orq 40(%rsp),%rbx 258 259 jnz .Lbeeu_err 260 261 262 263 264 movq 0(%rdx),%r8 265 movq 8(%rdx),%r9 266 movq 16(%rdx),%r10 267 movq 24(%rdx),%r11 268 xorq %rdi,%rdi 269 270.Lbeeu_reduction_loop: 271 movq %r12,16(%rsp) 272 movq %r13,24(%rsp) 273 movq %r14,32(%rsp) 274 movq %r15,40(%rsp) 275 movq %rbp,48(%rsp) 276 277 278 subq %r8,%r12 279 sbbq %r9,%r13 280 sbbq %r10,%r14 281 sbbq %r11,%r15 282 sbbq $0,%rbp 283 284 285 cmovcq 16(%rsp),%r12 286 cmovcq 24(%rsp),%r13 287 cmovcq 32(%rsp),%r14 288 cmovcq 40(%rsp),%r15 289 jnc .Lbeeu_reduction_loop 290 291 292 subq %r12,%r8 293 sbbq %r13,%r9 294 sbbq %r14,%r10 295 sbbq %r15,%r11 296 297.Lbeeu_save: 298 299 movq 0(%rsp),%rdi 300 301 movq %r8,0(%rdi) 302 movq %r9,8(%rdi) 303 movq %r10,16(%rdi) 304 movq %r11,24(%rdi) 305 306 307 movq $1,%rax 308 jmp .Lbeeu_finish 309 310.Lbeeu_err: 311 312 xorq %rax,%rax 313 314.Lbeeu_finish: 315 addq $80,%rsp 316.cfi_adjust_cfa_offset -80 317 popq %rsi 318.cfi_adjust_cfa_offset -8 319.cfi_restore rsi 320 popq %rbx 321.cfi_adjust_cfa_offset -8 322.cfi_restore rbx 323 popq %r15 324.cfi_adjust_cfa_offset -8 325.cfi_restore r15 326 popq %r14 327.cfi_adjust_cfa_offset -8 328.cfi_restore r14 329 popq %r13 330.cfi_adjust_cfa_offset -8 331.cfi_restore r13 332 popq %r12 333.cfi_adjust_cfa_offset -8 334.cfi_restore r12 335 popq %rbp 336.cfi_adjust_cfa_offset -8 337.cfi_restore rbp 338 .byte 0xf3,0xc3 339.cfi_endproc 340 341.size beeu_mod_inverse_vartime, .-beeu_mod_inverse_vartime 342#endif 343