1#if defined(__i386__) 2.file "src/crypto/bn/asm/x86-mont.S" 3.text 4.globl bn_mul_mont 5.hidden bn_mul_mont 6.type bn_mul_mont,@function 7.align 16 8bn_mul_mont: 9.L_bn_mul_mont_begin: 10 pushl %ebp 11 pushl %ebx 12 pushl %esi 13 pushl %edi 14 xorl %eax,%eax 15 movl 40(%esp),%edi 16 cmpl $4,%edi 17 jl .L000just_leave 18 leal 20(%esp),%esi 19 leal 24(%esp),%edx 20 movl %esp,%ebp 21 addl $2,%edi 22 negl %edi 23 leal -32(%esp,%edi,4),%esp 24 negl %edi 25 movl %esp,%eax 26 subl %edx,%eax 27 andl $2047,%eax 28 subl %eax,%esp 29 xorl %esp,%edx 30 andl $2048,%edx 31 xorl $2048,%edx 32 subl %edx,%esp 33 andl $-64,%esp 34 movl (%esi),%eax 35 movl 4(%esi),%ebx 36 movl 8(%esi),%ecx 37 movl 12(%esi),%edx 38 movl 16(%esi),%esi 39 movl (%esi),%esi 40 movl %eax,4(%esp) 41 movl %ebx,8(%esp) 42 movl %ecx,12(%esp) 43 movl %edx,16(%esp) 44 movl %esi,20(%esp) 45 leal -3(%edi),%ebx 46 movl %ebp,24(%esp) 47 call .L001PIC_me_up 48.L001PIC_me_up: 49 popl %eax 50 leal OPENSSL_ia32cap_P-.L001PIC_me_up(%eax),%eax 51 btl $26,(%eax) 52 jnc .L002non_sse2 53 movl $-1,%eax 54 movd %eax,%mm7 55 movl 8(%esp),%esi 56 movl 12(%esp),%edi 57 movl 16(%esp),%ebp 58 xorl %edx,%edx 59 xorl %ecx,%ecx 60 movd (%edi),%mm4 61 movd (%esi),%mm5 62 movd (%ebp),%mm3 63 pmuludq %mm4,%mm5 64 movq %mm5,%mm2 65 movq %mm5,%mm0 66 pand %mm7,%mm0 67 pmuludq 20(%esp),%mm5 68 pmuludq %mm5,%mm3 69 paddq %mm0,%mm3 70 movd 4(%ebp),%mm1 71 movd 4(%esi),%mm0 72 psrlq $32,%mm2 73 psrlq $32,%mm3 74 incl %ecx 75.align 16 76.L0031st: 77 pmuludq %mm4,%mm0 78 pmuludq %mm5,%mm1 79 paddq %mm0,%mm2 80 paddq %mm1,%mm3 81 movq %mm2,%mm0 82 pand %mm7,%mm0 83 movd 4(%ebp,%ecx,4),%mm1 84 paddq %mm0,%mm3 85 movd 4(%esi,%ecx,4),%mm0 86 psrlq $32,%mm2 87 movd %mm3,28(%esp,%ecx,4) 88 psrlq $32,%mm3 89 leal 1(%ecx),%ecx 90 cmpl %ebx,%ecx 91 jl .L0031st 92 pmuludq %mm4,%mm0 93 pmuludq %mm5,%mm1 94 paddq %mm0,%mm2 95 paddq %mm1,%mm3 96 movq %mm2,%mm0 97 pand %mm7,%mm0 98 paddq %mm0,%mm3 99 movd %mm3,28(%esp,%ecx,4) 100 psrlq $32,%mm2 101 psrlq $32,%mm3 102 paddq %mm2,%mm3 103 movq %mm3,32(%esp,%ebx,4) 104 incl %edx 105.L004outer: 106 xorl %ecx,%ecx 107 movd (%edi,%edx,4),%mm4 108 movd (%esi),%mm5 109 movd 32(%esp),%mm6 110 movd (%ebp),%mm3 111 pmuludq %mm4,%mm5 112 paddq %mm6,%mm5 113 movq %mm5,%mm0 114 movq %mm5,%mm2 115 pand %mm7,%mm0 116 pmuludq 20(%esp),%mm5 117 pmuludq %mm5,%mm3 118 paddq %mm0,%mm3 119 movd 36(%esp),%mm6 120 movd 4(%ebp),%mm1 121 movd 4(%esi),%mm0 122 psrlq $32,%mm2 123 psrlq $32,%mm3 124 paddq %mm6,%mm2 125 incl %ecx 126 decl %ebx 127.L005inner: 128 pmuludq %mm4,%mm0 129 pmuludq %mm5,%mm1 130 paddq %mm0,%mm2 131 paddq %mm1,%mm3 132 movq %mm2,%mm0 133 movd 36(%esp,%ecx,4),%mm6 134 pand %mm7,%mm0 135 movd 4(%ebp,%ecx,4),%mm1 136 paddq %mm0,%mm3 137 movd 4(%esi,%ecx,4),%mm0 138 psrlq $32,%mm2 139 movd %mm3,28(%esp,%ecx,4) 140 psrlq $32,%mm3 141 paddq %mm6,%mm2 142 decl %ebx 143 leal 1(%ecx),%ecx 144 jnz .L005inner 145 movl %ecx,%ebx 146 pmuludq %mm4,%mm0 147 pmuludq %mm5,%mm1 148 paddq %mm0,%mm2 149 paddq %mm1,%mm3 150 movq %mm2,%mm0 151 pand %mm7,%mm0 152 paddq %mm0,%mm3 153 movd %mm3,28(%esp,%ecx,4) 154 psrlq $32,%mm2 155 psrlq $32,%mm3 156 movd 36(%esp,%ebx,4),%mm6 157 paddq %mm2,%mm3 158 paddq %mm6,%mm3 159 movq %mm3,32(%esp,%ebx,4) 160 leal 1(%edx),%edx 161 cmpl %ebx,%edx 162 jle .L004outer 163 emms 164 jmp .L006common_tail 165.align 16 166.L002non_sse2: 167 movl 8(%esp),%esi 168 leal 1(%ebx),%ebp 169 movl 12(%esp),%edi 170 xorl %ecx,%ecx 171 movl %esi,%edx 172 andl $1,%ebp 173 subl %edi,%edx 174 leal 4(%edi,%ebx,4),%eax 175 orl %edx,%ebp 176 movl (%edi),%edi 177 jz .L007bn_sqr_mont 178 movl %eax,28(%esp) 179 movl (%esi),%eax 180 xorl %edx,%edx 181.align 16 182.L008mull: 183 movl %edx,%ebp 184 mull %edi 185 addl %eax,%ebp 186 leal 1(%ecx),%ecx 187 adcl $0,%edx 188 movl (%esi,%ecx,4),%eax 189 cmpl %ebx,%ecx 190 movl %ebp,28(%esp,%ecx,4) 191 jl .L008mull 192 movl %edx,%ebp 193 mull %edi 194 movl 20(%esp),%edi 195 addl %ebp,%eax 196 movl 16(%esp),%esi 197 adcl $0,%edx 198 imull 32(%esp),%edi 199 movl %eax,32(%esp,%ebx,4) 200 xorl %ecx,%ecx 201 movl %edx,36(%esp,%ebx,4) 202 movl %ecx,40(%esp,%ebx,4) 203 movl (%esi),%eax 204 mull %edi 205 addl 32(%esp),%eax 206 movl 4(%esi),%eax 207 adcl $0,%edx 208 incl %ecx 209 jmp .L0092ndmadd 210.align 16 211.L0101stmadd: 212 movl %edx,%ebp 213 mull %edi 214 addl 32(%esp,%ecx,4),%ebp 215 leal 1(%ecx),%ecx 216 adcl $0,%edx 217 addl %eax,%ebp 218 movl (%esi,%ecx,4),%eax 219 adcl $0,%edx 220 cmpl %ebx,%ecx 221 movl %ebp,28(%esp,%ecx,4) 222 jl .L0101stmadd 223 movl %edx,%ebp 224 mull %edi 225 addl 32(%esp,%ebx,4),%eax 226 movl 20(%esp),%edi 227 adcl $0,%edx 228 movl 16(%esp),%esi 229 addl %eax,%ebp 230 adcl $0,%edx 231 imull 32(%esp),%edi 232 xorl %ecx,%ecx 233 addl 36(%esp,%ebx,4),%edx 234 movl %ebp,32(%esp,%ebx,4) 235 adcl $0,%ecx 236 movl (%esi),%eax 237 movl %edx,36(%esp,%ebx,4) 238 movl %ecx,40(%esp,%ebx,4) 239 mull %edi 240 addl 32(%esp),%eax 241 movl 4(%esi),%eax 242 adcl $0,%edx 243 movl $1,%ecx 244.align 16 245.L0092ndmadd: 246 movl %edx,%ebp 247 mull %edi 248 addl 32(%esp,%ecx,4),%ebp 249 leal 1(%ecx),%ecx 250 adcl $0,%edx 251 addl %eax,%ebp 252 movl (%esi,%ecx,4),%eax 253 adcl $0,%edx 254 cmpl %ebx,%ecx 255 movl %ebp,24(%esp,%ecx,4) 256 jl .L0092ndmadd 257 movl %edx,%ebp 258 mull %edi 259 addl 32(%esp,%ebx,4),%ebp 260 adcl $0,%edx 261 addl %eax,%ebp 262 adcl $0,%edx 263 movl %ebp,28(%esp,%ebx,4) 264 xorl %eax,%eax 265 movl 12(%esp),%ecx 266 addl 36(%esp,%ebx,4),%edx 267 adcl 40(%esp,%ebx,4),%eax 268 leal 4(%ecx),%ecx 269 movl %edx,32(%esp,%ebx,4) 270 cmpl 28(%esp),%ecx 271 movl %eax,36(%esp,%ebx,4) 272 je .L006common_tail 273 movl (%ecx),%edi 274 movl 8(%esp),%esi 275 movl %ecx,12(%esp) 276 xorl %ecx,%ecx 277 xorl %edx,%edx 278 movl (%esi),%eax 279 jmp .L0101stmadd 280.align 16 281.L007bn_sqr_mont: 282 movl %ebx,(%esp) 283 movl %ecx,12(%esp) 284 movl %edi,%eax 285 mull %edi 286 movl %eax,32(%esp) 287 movl %edx,%ebx 288 shrl $1,%edx 289 andl $1,%ebx 290 incl %ecx 291.align 16 292.L011sqr: 293 movl (%esi,%ecx,4),%eax 294 movl %edx,%ebp 295 mull %edi 296 addl %ebp,%eax 297 leal 1(%ecx),%ecx 298 adcl $0,%edx 299 leal (%ebx,%eax,2),%ebp 300 shrl $31,%eax 301 cmpl (%esp),%ecx 302 movl %eax,%ebx 303 movl %ebp,28(%esp,%ecx,4) 304 jl .L011sqr 305 movl (%esi,%ecx,4),%eax 306 movl %edx,%ebp 307 mull %edi 308 addl %ebp,%eax 309 movl 20(%esp),%edi 310 adcl $0,%edx 311 movl 16(%esp),%esi 312 leal (%ebx,%eax,2),%ebp 313 imull 32(%esp),%edi 314 shrl $31,%eax 315 movl %ebp,32(%esp,%ecx,4) 316 leal (%eax,%edx,2),%ebp 317 movl (%esi),%eax 318 shrl $31,%edx 319 movl %ebp,36(%esp,%ecx,4) 320 movl %edx,40(%esp,%ecx,4) 321 mull %edi 322 addl 32(%esp),%eax 323 movl %ecx,%ebx 324 adcl $0,%edx 325 movl 4(%esi),%eax 326 movl $1,%ecx 327.align 16 328.L0123rdmadd: 329 movl %edx,%ebp 330 mull %edi 331 addl 32(%esp,%ecx,4),%ebp 332 adcl $0,%edx 333 addl %eax,%ebp 334 movl 4(%esi,%ecx,4),%eax 335 adcl $0,%edx 336 movl %ebp,28(%esp,%ecx,4) 337 movl %edx,%ebp 338 mull %edi 339 addl 36(%esp,%ecx,4),%ebp 340 leal 2(%ecx),%ecx 341 adcl $0,%edx 342 addl %eax,%ebp 343 movl (%esi,%ecx,4),%eax 344 adcl $0,%edx 345 cmpl %ebx,%ecx 346 movl %ebp,24(%esp,%ecx,4) 347 jl .L0123rdmadd 348 movl %edx,%ebp 349 mull %edi 350 addl 32(%esp,%ebx,4),%ebp 351 adcl $0,%edx 352 addl %eax,%ebp 353 adcl $0,%edx 354 movl %ebp,28(%esp,%ebx,4) 355 movl 12(%esp),%ecx 356 xorl %eax,%eax 357 movl 8(%esp),%esi 358 addl 36(%esp,%ebx,4),%edx 359 adcl 40(%esp,%ebx,4),%eax 360 movl %edx,32(%esp,%ebx,4) 361 cmpl %ebx,%ecx 362 movl %eax,36(%esp,%ebx,4) 363 je .L006common_tail 364 movl 4(%esi,%ecx,4),%edi 365 leal 1(%ecx),%ecx 366 movl %edi,%eax 367 movl %ecx,12(%esp) 368 mull %edi 369 addl 32(%esp,%ecx,4),%eax 370 adcl $0,%edx 371 movl %eax,32(%esp,%ecx,4) 372 xorl %ebp,%ebp 373 cmpl %ebx,%ecx 374 leal 1(%ecx),%ecx 375 je .L013sqrlast 376 movl %edx,%ebx 377 shrl $1,%edx 378 andl $1,%ebx 379.align 16 380.L014sqradd: 381 movl (%esi,%ecx,4),%eax 382 movl %edx,%ebp 383 mull %edi 384 addl %ebp,%eax 385 leal (%eax,%eax,1),%ebp 386 adcl $0,%edx 387 shrl $31,%eax 388 addl 32(%esp,%ecx,4),%ebp 389 leal 1(%ecx),%ecx 390 adcl $0,%eax 391 addl %ebx,%ebp 392 adcl $0,%eax 393 cmpl (%esp),%ecx 394 movl %ebp,28(%esp,%ecx,4) 395 movl %eax,%ebx 396 jle .L014sqradd 397 movl %edx,%ebp 398 addl %edx,%edx 399 shrl $31,%ebp 400 addl %ebx,%edx 401 adcl $0,%ebp 402.L013sqrlast: 403 movl 20(%esp),%edi 404 movl 16(%esp),%esi 405 imull 32(%esp),%edi 406 addl 32(%esp,%ecx,4),%edx 407 movl (%esi),%eax 408 adcl $0,%ebp 409 movl %edx,32(%esp,%ecx,4) 410 movl %ebp,36(%esp,%ecx,4) 411 mull %edi 412 addl 32(%esp),%eax 413 leal -1(%ecx),%ebx 414 adcl $0,%edx 415 movl $1,%ecx 416 movl 4(%esi),%eax 417 jmp .L0123rdmadd 418.align 16 419.L006common_tail: 420 movl 16(%esp),%ebp 421 movl 4(%esp),%edi 422 leal 32(%esp),%esi 423 movl (%esi),%eax 424 movl %ebx,%ecx 425 xorl %edx,%edx 426.align 16 427.L015sub: 428 sbbl (%ebp,%edx,4),%eax 429 movl %eax,(%edi,%edx,4) 430 decl %ecx 431 movl 4(%esi,%edx,4),%eax 432 leal 1(%edx),%edx 433 jge .L015sub 434 sbbl $0,%eax 435.align 16 436.L016copy: 437 movl (%esi,%ebx,4),%edx 438 movl (%edi,%ebx,4),%ebp 439 xorl %ebp,%edx 440 andl %eax,%edx 441 xorl %ebp,%edx 442 movl %ecx,(%esi,%ebx,4) 443 movl %edx,(%edi,%ebx,4) 444 decl %ebx 445 jge .L016copy 446 movl 24(%esp),%esp 447 movl $1,%eax 448.L000just_leave: 449 popl %edi 450 popl %esi 451 popl %ebx 452 popl %ebp 453 ret 454.size bn_mul_mont,.-.L_bn_mul_mont_begin 455.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 456.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 457.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 458.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 459.byte 111,114,103,62,0 460#endif 461