1#if defined(__i386__) 2.file "src/crypto/bn/asm/x86-mont.S" 3.text 4.globl bn_mul_mont 5.hidden bn_mul_mont 6.type bn_mul_mont,@function 7.align 16 8bn_mul_mont: 9.L_bn_mul_mont_begin: 10 pushl %ebp 11 pushl %ebx 12 pushl %esi 13 pushl %edi 14 xorl %eax,%eax 15 movl 40(%esp),%edi 16 cmpl $4,%edi 17 jl .L000just_leave 18 leal 20(%esp),%esi 19 leal 24(%esp),%edx 20 addl $2,%edi 21 negl %edi 22 leal -32(%esp,%edi,4),%ebp 23 negl %edi 24 movl %ebp,%eax 25 subl %edx,%eax 26 andl $2047,%eax 27 subl %eax,%ebp 28 xorl %ebp,%edx 29 andl $2048,%edx 30 xorl $2048,%edx 31 subl %edx,%ebp 32 andl $-64,%ebp 33 movl %esp,%eax 34 subl %ebp,%eax 35 andl $-4096,%eax 36 movl %esp,%edx 37 leal (%ebp,%eax,1),%esp 38 movl (%esp),%eax 39 cmpl %ebp,%esp 40 ja .L001page_walk 41 jmp .L002page_walk_done 42.align 16 43.L001page_walk: 44 leal -4096(%esp),%esp 45 movl (%esp),%eax 46 cmpl %ebp,%esp 47 ja .L001page_walk 48.L002page_walk_done: 49 movl (%esi),%eax 50 movl 4(%esi),%ebx 51 movl 8(%esi),%ecx 52 movl 12(%esi),%ebp 53 movl 16(%esi),%esi 54 movl (%esi),%esi 55 movl %eax,4(%esp) 56 movl %ebx,8(%esp) 57 movl %ecx,12(%esp) 58 movl %ebp,16(%esp) 59 movl %esi,20(%esp) 60 leal -3(%edi),%ebx 61 movl %edx,24(%esp) 62 call .L003PIC_me_up 63.L003PIC_me_up: 64 popl %eax 65 leal OPENSSL_ia32cap_P-.L003PIC_me_up(%eax),%eax 66 btl $26,(%eax) 67 jnc .L004non_sse2 68 movl $-1,%eax 69 movd %eax,%mm7 70 movl 8(%esp),%esi 71 movl 12(%esp),%edi 72 movl 16(%esp),%ebp 73 xorl %edx,%edx 74 xorl %ecx,%ecx 75 movd (%edi),%mm4 76 movd (%esi),%mm5 77 movd (%ebp),%mm3 78 pmuludq %mm4,%mm5 79 movq %mm5,%mm2 80 movq %mm5,%mm0 81 pand %mm7,%mm0 82 pmuludq 20(%esp),%mm5 83 pmuludq %mm5,%mm3 84 paddq %mm0,%mm3 85 movd 4(%ebp),%mm1 86 movd 4(%esi),%mm0 87 psrlq $32,%mm2 88 psrlq $32,%mm3 89 incl %ecx 90.align 16 91.L0051st: 92 pmuludq %mm4,%mm0 93 pmuludq %mm5,%mm1 94 paddq %mm0,%mm2 95 paddq %mm1,%mm3 96 movq %mm2,%mm0 97 pand %mm7,%mm0 98 movd 4(%ebp,%ecx,4),%mm1 99 paddq %mm0,%mm3 100 movd 4(%esi,%ecx,4),%mm0 101 psrlq $32,%mm2 102 movd %mm3,28(%esp,%ecx,4) 103 psrlq $32,%mm3 104 leal 1(%ecx),%ecx 105 cmpl %ebx,%ecx 106 jl .L0051st 107 pmuludq %mm4,%mm0 108 pmuludq %mm5,%mm1 109 paddq %mm0,%mm2 110 paddq %mm1,%mm3 111 movq %mm2,%mm0 112 pand %mm7,%mm0 113 paddq %mm0,%mm3 114 movd %mm3,28(%esp,%ecx,4) 115 psrlq $32,%mm2 116 psrlq $32,%mm3 117 paddq %mm2,%mm3 118 movq %mm3,32(%esp,%ebx,4) 119 incl %edx 120.L006outer: 121 xorl %ecx,%ecx 122 movd (%edi,%edx,4),%mm4 123 movd (%esi),%mm5 124 movd 32(%esp),%mm6 125 movd (%ebp),%mm3 126 pmuludq %mm4,%mm5 127 paddq %mm6,%mm5 128 movq %mm5,%mm0 129 movq %mm5,%mm2 130 pand %mm7,%mm0 131 pmuludq 20(%esp),%mm5 132 pmuludq %mm5,%mm3 133 paddq %mm0,%mm3 134 movd 36(%esp),%mm6 135 movd 4(%ebp),%mm1 136 movd 4(%esi),%mm0 137 psrlq $32,%mm2 138 psrlq $32,%mm3 139 paddq %mm6,%mm2 140 incl %ecx 141 decl %ebx 142.L007inner: 143 pmuludq %mm4,%mm0 144 pmuludq %mm5,%mm1 145 paddq %mm0,%mm2 146 paddq %mm1,%mm3 147 movq %mm2,%mm0 148 movd 36(%esp,%ecx,4),%mm6 149 pand %mm7,%mm0 150 movd 4(%ebp,%ecx,4),%mm1 151 paddq %mm0,%mm3 152 movd 4(%esi,%ecx,4),%mm0 153 psrlq $32,%mm2 154 movd %mm3,28(%esp,%ecx,4) 155 psrlq $32,%mm3 156 paddq %mm6,%mm2 157 decl %ebx 158 leal 1(%ecx),%ecx 159 jnz .L007inner 160 movl %ecx,%ebx 161 pmuludq %mm4,%mm0 162 pmuludq %mm5,%mm1 163 paddq %mm0,%mm2 164 paddq %mm1,%mm3 165 movq %mm2,%mm0 166 pand %mm7,%mm0 167 paddq %mm0,%mm3 168 movd %mm3,28(%esp,%ecx,4) 169 psrlq $32,%mm2 170 psrlq $32,%mm3 171 movd 36(%esp,%ebx,4),%mm6 172 paddq %mm2,%mm3 173 paddq %mm6,%mm3 174 movq %mm3,32(%esp,%ebx,4) 175 leal 1(%edx),%edx 176 cmpl %ebx,%edx 177 jle .L006outer 178 emms 179 jmp .L008common_tail 180.align 16 181.L004non_sse2: 182 movl 8(%esp),%esi 183 leal 1(%ebx),%ebp 184 movl 12(%esp),%edi 185 xorl %ecx,%ecx 186 movl %esi,%edx 187 andl $1,%ebp 188 subl %edi,%edx 189 leal 4(%edi,%ebx,4),%eax 190 orl %edx,%ebp 191 movl (%edi),%edi 192 jz .L009bn_sqr_mont 193 movl %eax,28(%esp) 194 movl (%esi),%eax 195 xorl %edx,%edx 196.align 16 197.L010mull: 198 movl %edx,%ebp 199 mull %edi 200 addl %eax,%ebp 201 leal 1(%ecx),%ecx 202 adcl $0,%edx 203 movl (%esi,%ecx,4),%eax 204 cmpl %ebx,%ecx 205 movl %ebp,28(%esp,%ecx,4) 206 jl .L010mull 207 movl %edx,%ebp 208 mull %edi 209 movl 20(%esp),%edi 210 addl %ebp,%eax 211 movl 16(%esp),%esi 212 adcl $0,%edx 213 imull 32(%esp),%edi 214 movl %eax,32(%esp,%ebx,4) 215 xorl %ecx,%ecx 216 movl %edx,36(%esp,%ebx,4) 217 movl %ecx,40(%esp,%ebx,4) 218 movl (%esi),%eax 219 mull %edi 220 addl 32(%esp),%eax 221 movl 4(%esi),%eax 222 adcl $0,%edx 223 incl %ecx 224 jmp .L0112ndmadd 225.align 16 226.L0121stmadd: 227 movl %edx,%ebp 228 mull %edi 229 addl 32(%esp,%ecx,4),%ebp 230 leal 1(%ecx),%ecx 231 adcl $0,%edx 232 addl %eax,%ebp 233 movl (%esi,%ecx,4),%eax 234 adcl $0,%edx 235 cmpl %ebx,%ecx 236 movl %ebp,28(%esp,%ecx,4) 237 jl .L0121stmadd 238 movl %edx,%ebp 239 mull %edi 240 addl 32(%esp,%ebx,4),%eax 241 movl 20(%esp),%edi 242 adcl $0,%edx 243 movl 16(%esp),%esi 244 addl %eax,%ebp 245 adcl $0,%edx 246 imull 32(%esp),%edi 247 xorl %ecx,%ecx 248 addl 36(%esp,%ebx,4),%edx 249 movl %ebp,32(%esp,%ebx,4) 250 adcl $0,%ecx 251 movl (%esi),%eax 252 movl %edx,36(%esp,%ebx,4) 253 movl %ecx,40(%esp,%ebx,4) 254 mull %edi 255 addl 32(%esp),%eax 256 movl 4(%esi),%eax 257 adcl $0,%edx 258 movl $1,%ecx 259.align 16 260.L0112ndmadd: 261 movl %edx,%ebp 262 mull %edi 263 addl 32(%esp,%ecx,4),%ebp 264 leal 1(%ecx),%ecx 265 adcl $0,%edx 266 addl %eax,%ebp 267 movl (%esi,%ecx,4),%eax 268 adcl $0,%edx 269 cmpl %ebx,%ecx 270 movl %ebp,24(%esp,%ecx,4) 271 jl .L0112ndmadd 272 movl %edx,%ebp 273 mull %edi 274 addl 32(%esp,%ebx,4),%ebp 275 adcl $0,%edx 276 addl %eax,%ebp 277 adcl $0,%edx 278 movl %ebp,28(%esp,%ebx,4) 279 xorl %eax,%eax 280 movl 12(%esp),%ecx 281 addl 36(%esp,%ebx,4),%edx 282 adcl 40(%esp,%ebx,4),%eax 283 leal 4(%ecx),%ecx 284 movl %edx,32(%esp,%ebx,4) 285 cmpl 28(%esp),%ecx 286 movl %eax,36(%esp,%ebx,4) 287 je .L008common_tail 288 movl (%ecx),%edi 289 movl 8(%esp),%esi 290 movl %ecx,12(%esp) 291 xorl %ecx,%ecx 292 xorl %edx,%edx 293 movl (%esi),%eax 294 jmp .L0121stmadd 295.align 16 296.L009bn_sqr_mont: 297 movl %ebx,(%esp) 298 movl %ecx,12(%esp) 299 movl %edi,%eax 300 mull %edi 301 movl %eax,32(%esp) 302 movl %edx,%ebx 303 shrl $1,%edx 304 andl $1,%ebx 305 incl %ecx 306.align 16 307.L013sqr: 308 movl (%esi,%ecx,4),%eax 309 movl %edx,%ebp 310 mull %edi 311 addl %ebp,%eax 312 leal 1(%ecx),%ecx 313 adcl $0,%edx 314 leal (%ebx,%eax,2),%ebp 315 shrl $31,%eax 316 cmpl (%esp),%ecx 317 movl %eax,%ebx 318 movl %ebp,28(%esp,%ecx,4) 319 jl .L013sqr 320 movl (%esi,%ecx,4),%eax 321 movl %edx,%ebp 322 mull %edi 323 addl %ebp,%eax 324 movl 20(%esp),%edi 325 adcl $0,%edx 326 movl 16(%esp),%esi 327 leal (%ebx,%eax,2),%ebp 328 imull 32(%esp),%edi 329 shrl $31,%eax 330 movl %ebp,32(%esp,%ecx,4) 331 leal (%eax,%edx,2),%ebp 332 movl (%esi),%eax 333 shrl $31,%edx 334 movl %ebp,36(%esp,%ecx,4) 335 movl %edx,40(%esp,%ecx,4) 336 mull %edi 337 addl 32(%esp),%eax 338 movl %ecx,%ebx 339 adcl $0,%edx 340 movl 4(%esi),%eax 341 movl $1,%ecx 342.align 16 343.L0143rdmadd: 344 movl %edx,%ebp 345 mull %edi 346 addl 32(%esp,%ecx,4),%ebp 347 adcl $0,%edx 348 addl %eax,%ebp 349 movl 4(%esi,%ecx,4),%eax 350 adcl $0,%edx 351 movl %ebp,28(%esp,%ecx,4) 352 movl %edx,%ebp 353 mull %edi 354 addl 36(%esp,%ecx,4),%ebp 355 leal 2(%ecx),%ecx 356 adcl $0,%edx 357 addl %eax,%ebp 358 movl (%esi,%ecx,4),%eax 359 adcl $0,%edx 360 cmpl %ebx,%ecx 361 movl %ebp,24(%esp,%ecx,4) 362 jl .L0143rdmadd 363 movl %edx,%ebp 364 mull %edi 365 addl 32(%esp,%ebx,4),%ebp 366 adcl $0,%edx 367 addl %eax,%ebp 368 adcl $0,%edx 369 movl %ebp,28(%esp,%ebx,4) 370 movl 12(%esp),%ecx 371 xorl %eax,%eax 372 movl 8(%esp),%esi 373 addl 36(%esp,%ebx,4),%edx 374 adcl 40(%esp,%ebx,4),%eax 375 movl %edx,32(%esp,%ebx,4) 376 cmpl %ebx,%ecx 377 movl %eax,36(%esp,%ebx,4) 378 je .L008common_tail 379 movl 4(%esi,%ecx,4),%edi 380 leal 1(%ecx),%ecx 381 movl %edi,%eax 382 movl %ecx,12(%esp) 383 mull %edi 384 addl 32(%esp,%ecx,4),%eax 385 adcl $0,%edx 386 movl %eax,32(%esp,%ecx,4) 387 xorl %ebp,%ebp 388 cmpl %ebx,%ecx 389 leal 1(%ecx),%ecx 390 je .L015sqrlast 391 movl %edx,%ebx 392 shrl $1,%edx 393 andl $1,%ebx 394.align 16 395.L016sqradd: 396 movl (%esi,%ecx,4),%eax 397 movl %edx,%ebp 398 mull %edi 399 addl %ebp,%eax 400 leal (%eax,%eax,1),%ebp 401 adcl $0,%edx 402 shrl $31,%eax 403 addl 32(%esp,%ecx,4),%ebp 404 leal 1(%ecx),%ecx 405 adcl $0,%eax 406 addl %ebx,%ebp 407 adcl $0,%eax 408 cmpl (%esp),%ecx 409 movl %ebp,28(%esp,%ecx,4) 410 movl %eax,%ebx 411 jle .L016sqradd 412 movl %edx,%ebp 413 addl %edx,%edx 414 shrl $31,%ebp 415 addl %ebx,%edx 416 adcl $0,%ebp 417.L015sqrlast: 418 movl 20(%esp),%edi 419 movl 16(%esp),%esi 420 imull 32(%esp),%edi 421 addl 32(%esp,%ecx,4),%edx 422 movl (%esi),%eax 423 adcl $0,%ebp 424 movl %edx,32(%esp,%ecx,4) 425 movl %ebp,36(%esp,%ecx,4) 426 mull %edi 427 addl 32(%esp),%eax 428 leal -1(%ecx),%ebx 429 adcl $0,%edx 430 movl $1,%ecx 431 movl 4(%esi),%eax 432 jmp .L0143rdmadd 433.align 16 434.L008common_tail: 435 movl 16(%esp),%ebp 436 movl 4(%esp),%edi 437 leal 32(%esp),%esi 438 movl (%esi),%eax 439 movl %ebx,%ecx 440 xorl %edx,%edx 441.align 16 442.L017sub: 443 sbbl (%ebp,%edx,4),%eax 444 movl %eax,(%edi,%edx,4) 445 decl %ecx 446 movl 4(%esi,%edx,4),%eax 447 leal 1(%edx),%edx 448 jge .L017sub 449 sbbl $0,%eax 450 andl %eax,%esi 451 notl %eax 452 movl %edi,%ebp 453 andl %eax,%ebp 454 orl %ebp,%esi 455.align 16 456.L018copy: 457 movl (%esi,%ebx,4),%eax 458 movl %eax,(%edi,%ebx,4) 459 movl %ecx,32(%esp,%ebx,4) 460 decl %ebx 461 jge .L018copy 462 movl 24(%esp),%esp 463 movl $1,%eax 464.L000just_leave: 465 popl %edi 466 popl %esi 467 popl %ebx 468 popl %ebp 469 ret 470.size bn_mul_mont,.-.L_bn_mul_mont_begin 471.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 472.byte 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 473.byte 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 474.byte 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 475.byte 111,114,103,62,0 476#endif 477