1#if defined(__i386__) 2.file "src/crypto/bn/asm/x86-mont.S" 3.text 4.globl _bn_mul_mont 5.private_extern _bn_mul_mont 6.align 4 7_bn_mul_mont: 8L_bn_mul_mont_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 xorl %eax,%eax 14 movl 40(%esp),%edi 15 cmpl $4,%edi 16 jl L000just_leave 17 leal 20(%esp),%esi 18 leal 24(%esp),%edx 19 movl %esp,%ebp 20 addl $2,%edi 21 negl %edi 22 leal -32(%esp,%edi,4),%esp 23 negl %edi 24 movl %esp,%eax 25 subl %edx,%eax 26 andl $2047,%eax 27 subl %eax,%esp 28 xorl %esp,%edx 29 andl $2048,%edx 30 xorl $2048,%edx 31 subl %edx,%esp 32 andl $-64,%esp 33 movl (%esi),%eax 34 movl 4(%esi),%ebx 35 movl 8(%esi),%ecx 36 movl 12(%esi),%edx 37 movl 16(%esi),%esi 38 movl (%esi),%esi 39 movl %eax,4(%esp) 40 movl %ebx,8(%esp) 41 movl %ecx,12(%esp) 42 movl %edx,16(%esp) 43 movl %esi,20(%esp) 44 leal -3(%edi),%ebx 45 movl %ebp,24(%esp) 46 call L001PIC_me_up 47L001PIC_me_up: 48 popl %eax 49 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-L001PIC_me_up(%eax),%eax 50 btl $26,(%eax) 51 jnc L002non_sse2 52 movl $-1,%eax 53 movd %eax,%mm7 54 movl 8(%esp),%esi 55 movl 12(%esp),%edi 56 movl 16(%esp),%ebp 57 xorl %edx,%edx 58 xorl %ecx,%ecx 59 movd (%edi),%mm4 60 movd (%esi),%mm5 61 movd (%ebp),%mm3 62 pmuludq %mm4,%mm5 63 movq %mm5,%mm2 64 movq %mm5,%mm0 65 pand %mm7,%mm0 66 pmuludq 20(%esp),%mm5 67 pmuludq %mm5,%mm3 68 paddq %mm0,%mm3 69 movd 4(%ebp),%mm1 70 movd 4(%esi),%mm0 71 psrlq $32,%mm2 72 psrlq $32,%mm3 73 incl %ecx 74.align 4,0x90 75L0031st: 76 pmuludq %mm4,%mm0 77 pmuludq %mm5,%mm1 78 paddq %mm0,%mm2 79 paddq %mm1,%mm3 80 movq %mm2,%mm0 81 pand %mm7,%mm0 82 movd 4(%ebp,%ecx,4),%mm1 83 paddq %mm0,%mm3 84 movd 4(%esi,%ecx,4),%mm0 85 psrlq $32,%mm2 86 movd %mm3,28(%esp,%ecx,4) 87 psrlq $32,%mm3 88 leal 1(%ecx),%ecx 89 cmpl %ebx,%ecx 90 jl L0031st 91 pmuludq %mm4,%mm0 92 pmuludq %mm5,%mm1 93 paddq %mm0,%mm2 94 paddq %mm1,%mm3 95 movq %mm2,%mm0 96 pand %mm7,%mm0 97 paddq %mm0,%mm3 98 movd %mm3,28(%esp,%ecx,4) 99 psrlq $32,%mm2 100 psrlq $32,%mm3 101 paddq %mm2,%mm3 102 movq %mm3,32(%esp,%ebx,4) 103 incl %edx 104L004outer: 105 xorl %ecx,%ecx 106 movd (%edi,%edx,4),%mm4 107 movd (%esi),%mm5 108 movd 32(%esp),%mm6 109 movd (%ebp),%mm3 110 pmuludq %mm4,%mm5 111 paddq %mm6,%mm5 112 movq %mm5,%mm0 113 movq %mm5,%mm2 114 pand %mm7,%mm0 115 pmuludq 20(%esp),%mm5 116 pmuludq %mm5,%mm3 117 paddq %mm0,%mm3 118 movd 36(%esp),%mm6 119 movd 4(%ebp),%mm1 120 movd 4(%esi),%mm0 121 psrlq $32,%mm2 122 psrlq $32,%mm3 123 paddq %mm6,%mm2 124 incl %ecx 125 decl %ebx 126L005inner: 127 pmuludq %mm4,%mm0 128 pmuludq %mm5,%mm1 129 paddq %mm0,%mm2 130 paddq %mm1,%mm3 131 movq %mm2,%mm0 132 movd 36(%esp,%ecx,4),%mm6 133 pand %mm7,%mm0 134 movd 4(%ebp,%ecx,4),%mm1 135 paddq %mm0,%mm3 136 movd 4(%esi,%ecx,4),%mm0 137 psrlq $32,%mm2 138 movd %mm3,28(%esp,%ecx,4) 139 psrlq $32,%mm3 140 paddq %mm6,%mm2 141 decl %ebx 142 leal 1(%ecx),%ecx 143 jnz L005inner 144 movl %ecx,%ebx 145 pmuludq %mm4,%mm0 146 pmuludq %mm5,%mm1 147 paddq %mm0,%mm2 148 paddq %mm1,%mm3 149 movq %mm2,%mm0 150 pand %mm7,%mm0 151 paddq %mm0,%mm3 152 movd %mm3,28(%esp,%ecx,4) 153 psrlq $32,%mm2 154 psrlq $32,%mm3 155 movd 36(%esp,%ebx,4),%mm6 156 paddq %mm2,%mm3 157 paddq %mm6,%mm3 158 movq %mm3,32(%esp,%ebx,4) 159 leal 1(%edx),%edx 160 cmpl %ebx,%edx 161 jle L004outer 162 emms 163 jmp L006common_tail 164.align 4,0x90 165L002non_sse2: 166 movl 8(%esp),%esi 167 leal 1(%ebx),%ebp 168 movl 12(%esp),%edi 169 xorl %ecx,%ecx 170 movl %esi,%edx 171 andl $1,%ebp 172 subl %edi,%edx 173 leal 4(%edi,%ebx,4),%eax 174 orl %edx,%ebp 175 movl (%edi),%edi 176 jz L007bn_sqr_mont 177 movl %eax,28(%esp) 178 movl (%esi),%eax 179 xorl %edx,%edx 180.align 4,0x90 181L008mull: 182 movl %edx,%ebp 183 mull %edi 184 addl %eax,%ebp 185 leal 1(%ecx),%ecx 186 adcl $0,%edx 187 movl (%esi,%ecx,4),%eax 188 cmpl %ebx,%ecx 189 movl %ebp,28(%esp,%ecx,4) 190 jl L008mull 191 movl %edx,%ebp 192 mull %edi 193 movl 20(%esp),%edi 194 addl %ebp,%eax 195 movl 16(%esp),%esi 196 adcl $0,%edx 197 imull 32(%esp),%edi 198 movl %eax,32(%esp,%ebx,4) 199 xorl %ecx,%ecx 200 movl %edx,36(%esp,%ebx,4) 201 movl %ecx,40(%esp,%ebx,4) 202 movl (%esi),%eax 203 mull %edi 204 addl 32(%esp),%eax 205 movl 4(%esi),%eax 206 adcl $0,%edx 207 incl %ecx 208 jmp L0092ndmadd 209.align 4,0x90 210L0101stmadd: 211 movl %edx,%ebp 212 mull %edi 213 addl 32(%esp,%ecx,4),%ebp 214 leal 1(%ecx),%ecx 215 adcl $0,%edx 216 addl %eax,%ebp 217 movl (%esi,%ecx,4),%eax 218 adcl $0,%edx 219 cmpl %ebx,%ecx 220 movl %ebp,28(%esp,%ecx,4) 221 jl L0101stmadd 222 movl %edx,%ebp 223 mull %edi 224 addl 32(%esp,%ebx,4),%eax 225 movl 20(%esp),%edi 226 adcl $0,%edx 227 movl 16(%esp),%esi 228 addl %eax,%ebp 229 adcl $0,%edx 230 imull 32(%esp),%edi 231 xorl %ecx,%ecx 232 addl 36(%esp,%ebx,4),%edx 233 movl %ebp,32(%esp,%ebx,4) 234 adcl $0,%ecx 235 movl (%esi),%eax 236 movl %edx,36(%esp,%ebx,4) 237 movl %ecx,40(%esp,%ebx,4) 238 mull %edi 239 addl 32(%esp),%eax 240 movl 4(%esi),%eax 241 adcl $0,%edx 242 movl $1,%ecx 243.align 4,0x90 244L0092ndmadd: 245 movl %edx,%ebp 246 mull %edi 247 addl 32(%esp,%ecx,4),%ebp 248 leal 1(%ecx),%ecx 249 adcl $0,%edx 250 addl %eax,%ebp 251 movl (%esi,%ecx,4),%eax 252 adcl $0,%edx 253 cmpl %ebx,%ecx 254 movl %ebp,24(%esp,%ecx,4) 255 jl L0092ndmadd 256 movl %edx,%ebp 257 mull %edi 258 addl 32(%esp,%ebx,4),%ebp 259 adcl $0,%edx 260 addl %eax,%ebp 261 adcl $0,%edx 262 movl %ebp,28(%esp,%ebx,4) 263 xorl %eax,%eax 264 movl 12(%esp),%ecx 265 addl 36(%esp,%ebx,4),%edx 266 adcl 40(%esp,%ebx,4),%eax 267 leal 4(%ecx),%ecx 268 movl %edx,32(%esp,%ebx,4) 269 cmpl 28(%esp),%ecx 270 movl %eax,36(%esp,%ebx,4) 271 je L006common_tail 272 movl (%ecx),%edi 273 movl 8(%esp),%esi 274 movl %ecx,12(%esp) 275 xorl %ecx,%ecx 276 xorl %edx,%edx 277 movl (%esi),%eax 278 jmp L0101stmadd 279.align 4,0x90 280L007bn_sqr_mont: 281 movl %ebx,(%esp) 282 movl %ecx,12(%esp) 283 movl %edi,%eax 284 mull %edi 285 movl %eax,32(%esp) 286 movl %edx,%ebx 287 shrl $1,%edx 288 andl $1,%ebx 289 incl %ecx 290.align 4,0x90 291L011sqr: 292 movl (%esi,%ecx,4),%eax 293 movl %edx,%ebp 294 mull %edi 295 addl %ebp,%eax 296 leal 1(%ecx),%ecx 297 adcl $0,%edx 298 leal (%ebx,%eax,2),%ebp 299 shrl $31,%eax 300 cmpl (%esp),%ecx 301 movl %eax,%ebx 302 movl %ebp,28(%esp,%ecx,4) 303 jl L011sqr 304 movl (%esi,%ecx,4),%eax 305 movl %edx,%ebp 306 mull %edi 307 addl %ebp,%eax 308 movl 20(%esp),%edi 309 adcl $0,%edx 310 movl 16(%esp),%esi 311 leal (%ebx,%eax,2),%ebp 312 imull 32(%esp),%edi 313 shrl $31,%eax 314 movl %ebp,32(%esp,%ecx,4) 315 leal (%eax,%edx,2),%ebp 316 movl (%esi),%eax 317 shrl $31,%edx 318 movl %ebp,36(%esp,%ecx,4) 319 movl %edx,40(%esp,%ecx,4) 320 mull %edi 321 addl 32(%esp),%eax 322 movl %ecx,%ebx 323 adcl $0,%edx 324 movl 4(%esi),%eax 325 movl $1,%ecx 326.align 4,0x90 327L0123rdmadd: 328 movl %edx,%ebp 329 mull %edi 330 addl 32(%esp,%ecx,4),%ebp 331 adcl $0,%edx 332 addl %eax,%ebp 333 movl 4(%esi,%ecx,4),%eax 334 adcl $0,%edx 335 movl %ebp,28(%esp,%ecx,4) 336 movl %edx,%ebp 337 mull %edi 338 addl 36(%esp,%ecx,4),%ebp 339 leal 2(%ecx),%ecx 340 adcl $0,%edx 341 addl %eax,%ebp 342 movl (%esi,%ecx,4),%eax 343 adcl $0,%edx 344 cmpl %ebx,%ecx 345 movl %ebp,24(%esp,%ecx,4) 346 jl L0123rdmadd 347 movl %edx,%ebp 348 mull %edi 349 addl 32(%esp,%ebx,4),%ebp 350 adcl $0,%edx 351 addl %eax,%ebp 352 adcl $0,%edx 353 movl %ebp,28(%esp,%ebx,4) 354 movl 12(%esp),%ecx 355 xorl %eax,%eax 356 movl 8(%esp),%esi 357 addl 36(%esp,%ebx,4),%edx 358 adcl 40(%esp,%ebx,4),%eax 359 movl %edx,32(%esp,%ebx,4) 360 cmpl %ebx,%ecx 361 movl %eax,36(%esp,%ebx,4) 362 je L006common_tail 363 movl 4(%esi,%ecx,4),%edi 364 leal 1(%ecx),%ecx 365 movl %edi,%eax 366 movl %ecx,12(%esp) 367 mull %edi 368 addl 32(%esp,%ecx,4),%eax 369 adcl $0,%edx 370 movl %eax,32(%esp,%ecx,4) 371 xorl %ebp,%ebp 372 cmpl %ebx,%ecx 373 leal 1(%ecx),%ecx 374 je L013sqrlast 375 movl %edx,%ebx 376 shrl $1,%edx 377 andl $1,%ebx 378.align 4,0x90 379L014sqradd: 380 movl (%esi,%ecx,4),%eax 381 movl %edx,%ebp 382 mull %edi 383 addl %ebp,%eax 384 leal (%eax,%eax,1),%ebp 385 adcl $0,%edx 386 shrl $31,%eax 387 addl 32(%esp,%ecx,4),%ebp 388 leal 1(%ecx),%ecx 389 adcl $0,%eax 390 addl %ebx,%ebp 391 adcl $0,%eax 392 cmpl (%esp),%ecx 393 movl %ebp,28(%esp,%ecx,4) 394 movl %eax,%ebx 395 jle L014sqradd 396 movl %edx,%ebp 397 addl %edx,%edx 398 shrl $31,%ebp 399 addl %ebx,%edx 400 adcl $0,%ebp 401L013sqrlast: 402 movl 20(%esp),%edi 403 movl 16(%esp),%esi 404 imull 32(%esp),%edi 405 addl 32(%esp,%ecx,4),%edx 406 movl (%esi),%eax 407 adcl $0,%ebp 408 movl %edx,32(%esp,%ecx,4) 409 movl %ebp,36(%esp,%ecx,4) 410 mull %edi 411 addl 32(%esp),%eax 412 leal -1(%ecx),%ebx 413 adcl $0,%edx 414 movl $1,%ecx 415 movl 4(%esi),%eax 416 jmp L0123rdmadd 417.align 4,0x90 418L006common_tail: 419 movl 16(%esp),%ebp 420 movl 4(%esp),%edi 421 leal 32(%esp),%esi 422 movl (%esi),%eax 423 movl %ebx,%ecx 424 xorl %edx,%edx 425.align 4,0x90 426L015sub: 427 sbbl (%ebp,%edx,4),%eax 428 movl %eax,(%edi,%edx,4) 429 decl %ecx 430 movl 4(%esi,%edx,4),%eax 431 leal 1(%edx),%edx 432 jge L015sub 433 sbbl $0,%eax 434.align 4,0x90 435L016copy: 436 movl (%esi,%ebx,4),%edx 437 movl (%edi,%ebx,4),%ebp 438 xorl %ebp,%edx 439 andl %eax,%edx 440 xorl %ebp,%edx 441 movl %ecx,(%esi,%ebx,4) 442 movl %edx,(%edi,%ebx,4) 443 decl %ebx 444 jge L016copy 445 movl 24(%esp),%esp 446 movl $1,%eax 447L000just_leave: 448 popl %edi 449 popl %esi 450 popl %ebx 451 popl %ebp 452 ret 453.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 454.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 455.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 456.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 457.byte 111,114,103,62,0 458.section __IMPORT,__pointers,non_lazy_symbol_pointers 459L_OPENSSL_ia32cap_P$non_lazy_ptr: 460.indirect_symbol _OPENSSL_ia32cap_P 461.long 0 462#endif 463