1; This file is generated from a similarly-named Perl script in the BoringSSL 2; source tree. Do not edit by hand. 3 4%ifdef BORINGSSL_PREFIX 5%include "boringssl_prefix_symbols_nasm.inc" 6%endif 7%ifidn __OUTPUT_FORMAT__,obj 8section code use32 class=code align=64 9%elifidn __OUTPUT_FORMAT__,win32 10%ifdef __YASM_VERSION_ID__ 11%if __YASM_VERSION_ID__ < 01010000h 12%error yasm version 1.1.0 or later needed. 13%endif 14; Yasm automatically includes .00 and complains about redefining it. 15; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html 16%else 17$@feat.00 equ 1 18%endif 19section .text code align=64 20%else 21section .text code 22%endif 23;extern _OPENSSL_ia32cap_P 24global _bn_mul_mont 25align 16 26_bn_mul_mont: 27L$_bn_mul_mont_begin: 28 push ebp 29 push ebx 30 push esi 31 push edi 32 xor eax,eax 33 mov edi,DWORD [40+esp] 34 cmp edi,4 35 jl NEAR L$000just_leave 36 lea esi,[20+esp] 37 lea edx,[24+esp] 38 add edi,2 39 neg edi 40 lea ebp,[edi*4+esp-32] 41 neg edi 42 mov eax,ebp 43 sub eax,edx 44 and eax,2047 45 sub ebp,eax 46 xor edx,ebp 47 and edx,2048 48 xor edx,2048 49 sub ebp,edx 50 and ebp,-64 51 mov eax,esp 52 sub eax,ebp 53 and eax,-4096 54 mov edx,esp 55 lea esp,[eax*1+ebp] 56 mov eax,DWORD [esp] 57 cmp esp,ebp 58 ja NEAR L$001page_walk 59 jmp NEAR L$002page_walk_done 60align 16 61L$001page_walk: 62 lea esp,[esp-4096] 63 mov eax,DWORD [esp] 64 cmp esp,ebp 65 ja NEAR L$001page_walk 66L$002page_walk_done: 67 mov eax,DWORD [esi] 68 mov ebx,DWORD [4+esi] 69 mov ecx,DWORD [8+esi] 70 mov ebp,DWORD [12+esi] 71 mov esi,DWORD [16+esi] 72 mov esi,DWORD [esi] 73 mov DWORD [4+esp],eax 74 mov DWORD [8+esp],ebx 75 mov DWORD [12+esp],ecx 76 mov DWORD [16+esp],ebp 77 mov DWORD [20+esp],esi 78 lea ebx,[edi-3] 79 mov DWORD [24+esp],edx 80 lea eax,[_OPENSSL_ia32cap_P] 81 bt DWORD [eax],26 82 jnc NEAR L$003non_sse2 83 mov eax,-1 84 movd mm7,eax 85 mov esi,DWORD [8+esp] 86 mov edi,DWORD [12+esp] 87 mov ebp,DWORD [16+esp] 88 xor edx,edx 89 xor ecx,ecx 90 movd mm4,DWORD [edi] 91 movd mm5,DWORD [esi] 92 movd mm3,DWORD [ebp] 93 pmuludq mm5,mm4 94 movq mm2,mm5 95 movq mm0,mm5 96 pand mm0,mm7 97 pmuludq mm5,[20+esp] 98 pmuludq mm3,mm5 99 paddq mm3,mm0 100 movd mm1,DWORD [4+ebp] 101 movd mm0,DWORD [4+esi] 102 psrlq mm2,32 103 psrlq mm3,32 104 inc ecx 105align 16 106L$0041st: 107 pmuludq mm0,mm4 108 pmuludq mm1,mm5 109 paddq mm2,mm0 110 paddq mm3,mm1 111 movq mm0,mm2 112 pand mm0,mm7 113 movd mm1,DWORD [4+ecx*4+ebp] 114 paddq mm3,mm0 115 movd mm0,DWORD [4+ecx*4+esi] 116 psrlq mm2,32 117 movd DWORD [28+ecx*4+esp],mm3 118 psrlq mm3,32 119 lea ecx,[1+ecx] 120 cmp ecx,ebx 121 jl NEAR L$0041st 122 pmuludq mm0,mm4 123 pmuludq mm1,mm5 124 paddq mm2,mm0 125 paddq mm3,mm1 126 movq mm0,mm2 127 pand mm0,mm7 128 paddq mm3,mm0 129 movd DWORD [28+ecx*4+esp],mm3 130 psrlq mm2,32 131 psrlq mm3,32 132 paddq mm3,mm2 133 movq [32+ebx*4+esp],mm3 134 inc edx 135L$005outer: 136 xor ecx,ecx 137 movd mm4,DWORD [edx*4+edi] 138 movd mm5,DWORD [esi] 139 movd mm6,DWORD [32+esp] 140 movd mm3,DWORD [ebp] 141 pmuludq mm5,mm4 142 paddq mm5,mm6 143 movq mm0,mm5 144 movq mm2,mm5 145 pand mm0,mm7 146 pmuludq mm5,[20+esp] 147 pmuludq mm3,mm5 148 paddq mm3,mm0 149 movd mm6,DWORD [36+esp] 150 movd mm1,DWORD [4+ebp] 151 movd mm0,DWORD [4+esi] 152 psrlq mm2,32 153 psrlq mm3,32 154 paddq mm2,mm6 155 inc ecx 156 dec ebx 157L$006inner: 158 pmuludq mm0,mm4 159 pmuludq mm1,mm5 160 paddq mm2,mm0 161 paddq mm3,mm1 162 movq mm0,mm2 163 movd mm6,DWORD [36+ecx*4+esp] 164 pand mm0,mm7 165 movd mm1,DWORD [4+ecx*4+ebp] 166 paddq mm3,mm0 167 movd mm0,DWORD [4+ecx*4+esi] 168 psrlq mm2,32 169 movd DWORD [28+ecx*4+esp],mm3 170 psrlq mm3,32 171 paddq mm2,mm6 172 dec ebx 173 lea ecx,[1+ecx] 174 jnz NEAR L$006inner 175 mov ebx,ecx 176 pmuludq mm0,mm4 177 pmuludq mm1,mm5 178 paddq mm2,mm0 179 paddq mm3,mm1 180 movq mm0,mm2 181 pand mm0,mm7 182 paddq mm3,mm0 183 movd DWORD [28+ecx*4+esp],mm3 184 psrlq mm2,32 185 psrlq mm3,32 186 movd mm6,DWORD [36+ebx*4+esp] 187 paddq mm3,mm2 188 paddq mm3,mm6 189 movq [32+ebx*4+esp],mm3 190 lea edx,[1+edx] 191 cmp edx,ebx 192 jle NEAR L$005outer 193 emms 194 jmp NEAR L$007common_tail 195align 16 196L$003non_sse2: 197 mov esi,DWORD [8+esp] 198 lea ebp,[1+ebx] 199 mov edi,DWORD [12+esp] 200 xor ecx,ecx 201 mov edx,esi 202 and ebp,1 203 sub edx,edi 204 lea eax,[4+ebx*4+edi] 205 or ebp,edx 206 mov edi,DWORD [edi] 207 jz NEAR L$008bn_sqr_mont 208 mov DWORD [28+esp],eax 209 mov eax,DWORD [esi] 210 xor edx,edx 211align 16 212L$009mull: 213 mov ebp,edx 214 mul edi 215 add ebp,eax 216 lea ecx,[1+ecx] 217 adc edx,0 218 mov eax,DWORD [ecx*4+esi] 219 cmp ecx,ebx 220 mov DWORD [28+ecx*4+esp],ebp 221 jl NEAR L$009mull 222 mov ebp,edx 223 mul edi 224 mov edi,DWORD [20+esp] 225 add eax,ebp 226 mov esi,DWORD [16+esp] 227 adc edx,0 228 imul edi,DWORD [32+esp] 229 mov DWORD [32+ebx*4+esp],eax 230 xor ecx,ecx 231 mov DWORD [36+ebx*4+esp],edx 232 mov DWORD [40+ebx*4+esp],ecx 233 mov eax,DWORD [esi] 234 mul edi 235 add eax,DWORD [32+esp] 236 mov eax,DWORD [4+esi] 237 adc edx,0 238 inc ecx 239 jmp NEAR L$0102ndmadd 240align 16 241L$0111stmadd: 242 mov ebp,edx 243 mul edi 244 add ebp,DWORD [32+ecx*4+esp] 245 lea ecx,[1+ecx] 246 adc edx,0 247 add ebp,eax 248 mov eax,DWORD [ecx*4+esi] 249 adc edx,0 250 cmp ecx,ebx 251 mov DWORD [28+ecx*4+esp],ebp 252 jl NEAR L$0111stmadd 253 mov ebp,edx 254 mul edi 255 add eax,DWORD [32+ebx*4+esp] 256 mov edi,DWORD [20+esp] 257 adc edx,0 258 mov esi,DWORD [16+esp] 259 add ebp,eax 260 adc edx,0 261 imul edi,DWORD [32+esp] 262 xor ecx,ecx 263 add edx,DWORD [36+ebx*4+esp] 264 mov DWORD [32+ebx*4+esp],ebp 265 adc ecx,0 266 mov eax,DWORD [esi] 267 mov DWORD [36+ebx*4+esp],edx 268 mov DWORD [40+ebx*4+esp],ecx 269 mul edi 270 add eax,DWORD [32+esp] 271 mov eax,DWORD [4+esi] 272 adc edx,0 273 mov ecx,1 274align 16 275L$0102ndmadd: 276 mov ebp,edx 277 mul edi 278 add ebp,DWORD [32+ecx*4+esp] 279 lea ecx,[1+ecx] 280 adc edx,0 281 add ebp,eax 282 mov eax,DWORD [ecx*4+esi] 283 adc edx,0 284 cmp ecx,ebx 285 mov DWORD [24+ecx*4+esp],ebp 286 jl NEAR L$0102ndmadd 287 mov ebp,edx 288 mul edi 289 add ebp,DWORD [32+ebx*4+esp] 290 adc edx,0 291 add ebp,eax 292 adc edx,0 293 mov DWORD [28+ebx*4+esp],ebp 294 xor eax,eax 295 mov ecx,DWORD [12+esp] 296 add edx,DWORD [36+ebx*4+esp] 297 adc eax,DWORD [40+ebx*4+esp] 298 lea ecx,[4+ecx] 299 mov DWORD [32+ebx*4+esp],edx 300 cmp ecx,DWORD [28+esp] 301 mov DWORD [36+ebx*4+esp],eax 302 je NEAR L$007common_tail 303 mov edi,DWORD [ecx] 304 mov esi,DWORD [8+esp] 305 mov DWORD [12+esp],ecx 306 xor ecx,ecx 307 xor edx,edx 308 mov eax,DWORD [esi] 309 jmp NEAR L$0111stmadd 310align 16 311L$008bn_sqr_mont: 312 mov DWORD [esp],ebx 313 mov DWORD [12+esp],ecx 314 mov eax,edi 315 mul edi 316 mov DWORD [32+esp],eax 317 mov ebx,edx 318 shr edx,1 319 and ebx,1 320 inc ecx 321align 16 322L$012sqr: 323 mov eax,DWORD [ecx*4+esi] 324 mov ebp,edx 325 mul edi 326 add eax,ebp 327 lea ecx,[1+ecx] 328 adc edx,0 329 lea ebp,[eax*2+ebx] 330 shr eax,31 331 cmp ecx,DWORD [esp] 332 mov ebx,eax 333 mov DWORD [28+ecx*4+esp],ebp 334 jl NEAR L$012sqr 335 mov eax,DWORD [ecx*4+esi] 336 mov ebp,edx 337 mul edi 338 add eax,ebp 339 mov edi,DWORD [20+esp] 340 adc edx,0 341 mov esi,DWORD [16+esp] 342 lea ebp,[eax*2+ebx] 343 imul edi,DWORD [32+esp] 344 shr eax,31 345 mov DWORD [32+ecx*4+esp],ebp 346 lea ebp,[edx*2+eax] 347 mov eax,DWORD [esi] 348 shr edx,31 349 mov DWORD [36+ecx*4+esp],ebp 350 mov DWORD [40+ecx*4+esp],edx 351 mul edi 352 add eax,DWORD [32+esp] 353 mov ebx,ecx 354 adc edx,0 355 mov eax,DWORD [4+esi] 356 mov ecx,1 357align 16 358L$0133rdmadd: 359 mov ebp,edx 360 mul edi 361 add ebp,DWORD [32+ecx*4+esp] 362 adc edx,0 363 add ebp,eax 364 mov eax,DWORD [4+ecx*4+esi] 365 adc edx,0 366 mov DWORD [28+ecx*4+esp],ebp 367 mov ebp,edx 368 mul edi 369 add ebp,DWORD [36+ecx*4+esp] 370 lea ecx,[2+ecx] 371 adc edx,0 372 add ebp,eax 373 mov eax,DWORD [ecx*4+esi] 374 adc edx,0 375 cmp ecx,ebx 376 mov DWORD [24+ecx*4+esp],ebp 377 jl NEAR L$0133rdmadd 378 mov ebp,edx 379 mul edi 380 add ebp,DWORD [32+ebx*4+esp] 381 adc edx,0 382 add ebp,eax 383 adc edx,0 384 mov DWORD [28+ebx*4+esp],ebp 385 mov ecx,DWORD [12+esp] 386 xor eax,eax 387 mov esi,DWORD [8+esp] 388 add edx,DWORD [36+ebx*4+esp] 389 adc eax,DWORD [40+ebx*4+esp] 390 mov DWORD [32+ebx*4+esp],edx 391 cmp ecx,ebx 392 mov DWORD [36+ebx*4+esp],eax 393 je NEAR L$007common_tail 394 mov edi,DWORD [4+ecx*4+esi] 395 lea ecx,[1+ecx] 396 mov eax,edi 397 mov DWORD [12+esp],ecx 398 mul edi 399 add eax,DWORD [32+ecx*4+esp] 400 adc edx,0 401 mov DWORD [32+ecx*4+esp],eax 402 xor ebp,ebp 403 cmp ecx,ebx 404 lea ecx,[1+ecx] 405 je NEAR L$014sqrlast 406 mov ebx,edx 407 shr edx,1 408 and ebx,1 409align 16 410L$015sqradd: 411 mov eax,DWORD [ecx*4+esi] 412 mov ebp,edx 413 mul edi 414 add eax,ebp 415 lea ebp,[eax*1+eax] 416 adc edx,0 417 shr eax,31 418 add ebp,DWORD [32+ecx*4+esp] 419 lea ecx,[1+ecx] 420 adc eax,0 421 add ebp,ebx 422 adc eax,0 423 cmp ecx,DWORD [esp] 424 mov DWORD [28+ecx*4+esp],ebp 425 mov ebx,eax 426 jle NEAR L$015sqradd 427 mov ebp,edx 428 add edx,edx 429 shr ebp,31 430 add edx,ebx 431 adc ebp,0 432L$014sqrlast: 433 mov edi,DWORD [20+esp] 434 mov esi,DWORD [16+esp] 435 imul edi,DWORD [32+esp] 436 add edx,DWORD [32+ecx*4+esp] 437 mov eax,DWORD [esi] 438 adc ebp,0 439 mov DWORD [32+ecx*4+esp],edx 440 mov DWORD [36+ecx*4+esp],ebp 441 mul edi 442 add eax,DWORD [32+esp] 443 lea ebx,[ecx-1] 444 adc edx,0 445 mov ecx,1 446 mov eax,DWORD [4+esi] 447 jmp NEAR L$0133rdmadd 448align 16 449L$007common_tail: 450 mov ebp,DWORD [16+esp] 451 mov edi,DWORD [4+esp] 452 lea esi,[32+esp] 453 mov eax,DWORD [esi] 454 mov ecx,ebx 455 xor edx,edx 456align 16 457L$016sub: 458 sbb eax,DWORD [edx*4+ebp] 459 mov DWORD [edx*4+edi],eax 460 dec ecx 461 mov eax,DWORD [4+edx*4+esi] 462 lea edx,[1+edx] 463 jge NEAR L$016sub 464 sbb eax,0 465 mov edx,-1 466 xor edx,eax 467 jmp NEAR L$017copy 468align 16 469L$017copy: 470 mov esi,DWORD [32+ebx*4+esp] 471 mov ebp,DWORD [ebx*4+edi] 472 mov DWORD [32+ebx*4+esp],ecx 473 and esi,eax 474 and ebp,edx 475 or ebp,esi 476 mov DWORD [ebx*4+edi],ebp 477 dec ebx 478 jge NEAR L$017copy 479 mov esp,DWORD [24+esp] 480 mov eax,1 481L$000just_leave: 482 pop edi 483 pop esi 484 pop ebx 485 pop ebp 486 ret 487db 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 488db 112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56 489db 54,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121 490db 32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46 491db 111,114,103,62,0 492segment .bss 493common _OPENSSL_ia32cap_P 16 494