1default rel 2%define XMMWORD 3%define YMMWORD 4%define ZMMWORD 5section .text code align=64 6 7 8EXTERN OPENSSL_ia32cap_P 9 10global bn_mul_mont_gather5 11 12ALIGN 64 13bn_mul_mont_gather5: 14 mov QWORD[8+rsp],rdi ;WIN64 prologue 15 mov QWORD[16+rsp],rsi 16 mov rax,rsp 17$L$SEH_begin_bn_mul_mont_gather5: 18 mov rdi,rcx 19 mov rsi,rdx 20 mov rdx,r8 21 mov rcx,r9 22 mov r8,QWORD[40+rsp] 23 mov r9,QWORD[48+rsp] 24 25 26 test r9d,7 27 jnz NEAR $L$mul_enter 28 jmp NEAR $L$mul4x_enter 29 30ALIGN 16 31$L$mul_enter: 32 mov r9d,r9d 33 mov rax,rsp 34 mov r10d,DWORD[56+rsp] 35 push rbx 36 push rbp 37 push r12 38 push r13 39 push r14 40 push r15 41 lea rsp,[((-40))+rsp] 42 movaps XMMWORD[rsp],xmm6 43 movaps XMMWORD[16+rsp],xmm7 44 lea r11,[2+r9] 45 neg r11 46 lea rsp,[r11*8+rsp] 47 and rsp,-1024 48 49 mov QWORD[8+r9*8+rsp],rax 50$L$mul_body: 51 mov r12,rdx 52 mov r11,r10 53 shr r10,3 54 and r11,7 55 not r10 56 lea rax,[$L$magic_masks] 57 and r10,3 58 lea r12,[96+r11*8+r12] 59 movq xmm4,QWORD[r10*8+rax] 60 movq xmm5,QWORD[8+r10*8+rax] 61 movq xmm6,QWORD[16+r10*8+rax] 62 movq xmm7,QWORD[24+r10*8+rax] 63 64 movq xmm0,QWORD[(((-96)))+r12] 65 movq xmm1,QWORD[((-32))+r12] 66 pand xmm0,xmm4 67 movq xmm2,QWORD[32+r12] 68 pand xmm1,xmm5 69 movq xmm3,QWORD[96+r12] 70 pand xmm2,xmm6 71 por xmm0,xmm1 72 pand xmm3,xmm7 73 por xmm0,xmm2 74 lea r12,[256+r12] 75 por xmm0,xmm3 76 77DB 102,72,15,126,195 78 79 mov r8,QWORD[r8] 80 mov rax,QWORD[rsi] 81 82 xor r14,r14 83 xor r15,r15 84 85 movq xmm0,QWORD[(((-96)))+r12] 86 movq xmm1,QWORD[((-32))+r12] 87 pand xmm0,xmm4 88 movq xmm2,QWORD[32+r12] 89 pand xmm1,xmm5 90 91 mov rbp,r8 92 mul rbx 93 mov r10,rax 94 mov rax,QWORD[rcx] 95 96 movq xmm3,QWORD[96+r12] 97 pand xmm2,xmm6 98 por xmm0,xmm1 99 pand xmm3,xmm7 100 101 imul rbp,r10 102 mov r11,rdx 103 104 por xmm0,xmm2 105 lea r12,[256+r12] 106 por xmm0,xmm3 107 108 mul rbp 109 add r10,rax 110 mov rax,QWORD[8+rsi] 111 adc rdx,0 112 mov r13,rdx 113 114 lea r15,[1+r15] 115 jmp NEAR $L$1st_enter 116 117ALIGN 16 118$L$1st: 119 add r13,rax 120 mov rax,QWORD[r15*8+rsi] 121 adc rdx,0 122 add r13,r11 123 mov r11,r10 124 adc rdx,0 125 mov QWORD[((-16))+r15*8+rsp],r13 126 mov r13,rdx 127 128$L$1st_enter: 129 mul rbx 130 add r11,rax 131 mov rax,QWORD[r15*8+rcx] 132 adc rdx,0 133 lea r15,[1+r15] 134 mov r10,rdx 135 136 mul rbp 137 cmp r15,r9 138 jne NEAR $L$1st 139 140DB 102,72,15,126,195 141 142 add r13,rax 143 mov rax,QWORD[rsi] 144 adc rdx,0 145 add r13,r11 146 adc rdx,0 147 mov QWORD[((-16))+r15*8+rsp],r13 148 mov r13,rdx 149 mov r11,r10 150 151 xor rdx,rdx 152 add r13,r11 153 adc rdx,0 154 mov QWORD[((-8))+r9*8+rsp],r13 155 mov QWORD[r9*8+rsp],rdx 156 157 lea r14,[1+r14] 158 jmp NEAR $L$outer 159ALIGN 16 160$L$outer: 161 xor r15,r15 162 mov rbp,r8 163 mov r10,QWORD[rsp] 164 165 movq xmm0,QWORD[(((-96)))+r12] 166 movq xmm1,QWORD[((-32))+r12] 167 pand xmm0,xmm4 168 movq xmm2,QWORD[32+r12] 169 pand xmm1,xmm5 170 171 mul rbx 172 add r10,rax 173 mov rax,QWORD[rcx] 174 adc rdx,0 175 176 movq xmm3,QWORD[96+r12] 177 pand xmm2,xmm6 178 por xmm0,xmm1 179 pand xmm3,xmm7 180 181 imul rbp,r10 182 mov r11,rdx 183 184 por xmm0,xmm2 185 lea r12,[256+r12] 186 por xmm0,xmm3 187 188 mul rbp 189 add r10,rax 190 mov rax,QWORD[8+rsi] 191 adc rdx,0 192 mov r10,QWORD[8+rsp] 193 mov r13,rdx 194 195 lea r15,[1+r15] 196 jmp NEAR $L$inner_enter 197 198ALIGN 16 199$L$inner: 200 add r13,rax 201 mov rax,QWORD[r15*8+rsi] 202 adc rdx,0 203 add r13,r10 204 mov r10,QWORD[r15*8+rsp] 205 adc rdx,0 206 mov QWORD[((-16))+r15*8+rsp],r13 207 mov r13,rdx 208 209$L$inner_enter: 210 mul rbx 211 add r11,rax 212 mov rax,QWORD[r15*8+rcx] 213 adc rdx,0 214 add r10,r11 215 mov r11,rdx 216 adc r11,0 217 lea r15,[1+r15] 218 219 mul rbp 220 cmp r15,r9 221 jne NEAR $L$inner 222 223DB 102,72,15,126,195 224 225 add r13,rax 226 mov rax,QWORD[rsi] 227 adc rdx,0 228 add r13,r10 229 mov r10,QWORD[r15*8+rsp] 230 adc rdx,0 231 mov QWORD[((-16))+r15*8+rsp],r13 232 mov r13,rdx 233 234 xor rdx,rdx 235 add r13,r11 236 adc rdx,0 237 add r13,r10 238 adc rdx,0 239 mov QWORD[((-8))+r9*8+rsp],r13 240 mov QWORD[r9*8+rsp],rdx 241 242 lea r14,[1+r14] 243 cmp r14,r9 244 jb NEAR $L$outer 245 246 xor r14,r14 247 mov rax,QWORD[rsp] 248 lea rsi,[rsp] 249 mov r15,r9 250 jmp NEAR $L$sub 251ALIGN 16 252$L$sub: sbb rax,QWORD[r14*8+rcx] 253 mov QWORD[r14*8+rdi],rax 254 mov rax,QWORD[8+r14*8+rsi] 255 lea r14,[1+r14] 256 dec r15 257 jnz NEAR $L$sub 258 259 sbb rax,0 260 xor r14,r14 261 mov r15,r9 262ALIGN 16 263$L$copy: 264 mov rsi,QWORD[r14*8+rsp] 265 mov rcx,QWORD[r14*8+rdi] 266 xor rsi,rcx 267 and rsi,rax 268 xor rsi,rcx 269 mov QWORD[r14*8+rsp],r14 270 mov QWORD[r14*8+rdi],rsi 271 lea r14,[1+r14] 272 sub r15,1 273 jnz NEAR $L$copy 274 275 mov rsi,QWORD[8+r9*8+rsp] 276 mov rax,1 277 movaps xmm6,XMMWORD[((-88))+rsi] 278 movaps xmm7,XMMWORD[((-72))+rsi] 279 mov r15,QWORD[((-48))+rsi] 280 mov r14,QWORD[((-40))+rsi] 281 mov r13,QWORD[((-32))+rsi] 282 mov r12,QWORD[((-24))+rsi] 283 mov rbp,QWORD[((-16))+rsi] 284 mov rbx,QWORD[((-8))+rsi] 285 lea rsp,[rsi] 286$L$mul_epilogue: 287 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 288 mov rsi,QWORD[16+rsp] 289 DB 0F3h,0C3h ;repret 290$L$SEH_end_bn_mul_mont_gather5: 291 292ALIGN 32 293bn_mul4x_mont_gather5: 294 mov QWORD[8+rsp],rdi ;WIN64 prologue 295 mov QWORD[16+rsp],rsi 296 mov rax,rsp 297$L$SEH_begin_bn_mul4x_mont_gather5: 298 mov rdi,rcx 299 mov rsi,rdx 300 mov rdx,r8 301 mov rcx,r9 302 mov r8,QWORD[40+rsp] 303 mov r9,QWORD[48+rsp] 304 305 306$L$mul4x_enter: 307DB 0x67 308 mov rax,rsp 309 push rbx 310 push rbp 311 push r12 312 push r13 313 push r14 314 push r15 315 lea rsp,[((-40))+rsp] 316 movaps XMMWORD[rsp],xmm6 317 movaps XMMWORD[16+rsp],xmm7 318DB 0x67 319 mov r10d,r9d 320 shl r9d,3 321 shl r10d,3+2 322 neg r9 323 324 325 326 327 328 329 330 331 lea r11,[((-64))+r9*2+rsp] 332 sub r11,rsi 333 and r11,4095 334 cmp r10,r11 335 jb NEAR $L$mul4xsp_alt 336 sub rsp,r11 337 lea rsp,[((-64))+r9*2+rsp] 338 jmp NEAR $L$mul4xsp_done 339 340ALIGN 32 341$L$mul4xsp_alt: 342 lea r10,[((4096-64))+r9*2] 343 lea rsp,[((-64))+r9*2+rsp] 344 sub r11,r10 345 mov r10,0 346 cmovc r11,r10 347 sub rsp,r11 348$L$mul4xsp_done: 349 and rsp,-64 350 neg r9 351 352 mov QWORD[40+rsp],rax 353$L$mul4x_body: 354 355 call mul4x_internal 356 357 mov rsi,QWORD[40+rsp] 358 mov rax,1 359 movaps xmm6,XMMWORD[((-88))+rsi] 360 movaps xmm7,XMMWORD[((-72))+rsi] 361 mov r15,QWORD[((-48))+rsi] 362 mov r14,QWORD[((-40))+rsi] 363 mov r13,QWORD[((-32))+rsi] 364 mov r12,QWORD[((-24))+rsi] 365 mov rbp,QWORD[((-16))+rsi] 366 mov rbx,QWORD[((-8))+rsi] 367 lea rsp,[rsi] 368$L$mul4x_epilogue: 369 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 370 mov rsi,QWORD[16+rsp] 371 DB 0F3h,0C3h ;repret 372$L$SEH_end_bn_mul4x_mont_gather5: 373 374 375ALIGN 32 376mul4x_internal: 377 shl r9,5 378 mov r10d,DWORD[56+rax] 379 lea r13,[256+r9*1+rdx] 380 shr r9,5 381 mov r11,r10 382 shr r10,3 383 and r11,7 384 not r10 385 lea rax,[$L$magic_masks] 386 and r10,3 387 lea r12,[96+r11*8+rdx] 388 movq xmm4,QWORD[r10*8+rax] 389 movq xmm5,QWORD[8+r10*8+rax] 390 add r11,7 391 movq xmm6,QWORD[16+r10*8+rax] 392 movq xmm7,QWORD[24+r10*8+rax] 393 and r11,7 394 395 movq xmm0,QWORD[(((-96)))+r12] 396 lea r14,[256+r12] 397 movq xmm1,QWORD[((-32))+r12] 398 pand xmm0,xmm4 399 movq xmm2,QWORD[32+r12] 400 pand xmm1,xmm5 401 movq xmm3,QWORD[96+r12] 402 pand xmm2,xmm6 403DB 0x67 404 por xmm0,xmm1 405 movq xmm1,QWORD[((-96))+r14] 406DB 0x67 407 pand xmm3,xmm7 408DB 0x67 409 por xmm0,xmm2 410 movq xmm2,QWORD[((-32))+r14] 411DB 0x67 412 pand xmm1,xmm4 413DB 0x67 414 por xmm0,xmm3 415 movq xmm3,QWORD[32+r14] 416 417DB 102,72,15,126,195 418 movq xmm0,QWORD[96+r14] 419 mov QWORD[((16+8))+rsp],r13 420 mov QWORD[((56+8))+rsp],rdi 421 422 mov r8,QWORD[r8] 423 mov rax,QWORD[rsi] 424 lea rsi,[r9*1+rsi] 425 neg r9 426 427 mov rbp,r8 428 mul rbx 429 mov r10,rax 430 mov rax,QWORD[rcx] 431 432 pand xmm2,xmm5 433 pand xmm3,xmm6 434 por xmm1,xmm2 435 436 imul rbp,r10 437 438 439 440 441 442 443 444 lea r14,[((64+8))+r11*8+rsp] 445 mov r11,rdx 446 447 pand xmm0,xmm7 448 por xmm1,xmm3 449 lea r12,[512+r12] 450 por xmm0,xmm1 451 452 mul rbp 453 add r10,rax 454 mov rax,QWORD[8+r9*1+rsi] 455 adc rdx,0 456 mov rdi,rdx 457 458 mul rbx 459 add r11,rax 460 mov rax,QWORD[16+rcx] 461 adc rdx,0 462 mov r10,rdx 463 464 mul rbp 465 add rdi,rax 466 mov rax,QWORD[16+r9*1+rsi] 467 adc rdx,0 468 add rdi,r11 469 lea r15,[32+r9] 470 lea rcx,[64+rcx] 471 adc rdx,0 472 mov QWORD[r14],rdi 473 mov r13,rdx 474 jmp NEAR $L$1st4x 475 476ALIGN 32 477$L$1st4x: 478 mul rbx 479 add r10,rax 480 mov rax,QWORD[((-32))+rcx] 481 lea r14,[32+r14] 482 adc rdx,0 483 mov r11,rdx 484 485 mul rbp 486 add r13,rax 487 mov rax,QWORD[((-8))+r15*1+rsi] 488 adc rdx,0 489 add r13,r10 490 adc rdx,0 491 mov QWORD[((-24))+r14],r13 492 mov rdi,rdx 493 494 mul rbx 495 add r11,rax 496 mov rax,QWORD[((-16))+rcx] 497 adc rdx,0 498 mov r10,rdx 499 500 mul rbp 501 add rdi,rax 502 mov rax,QWORD[r15*1+rsi] 503 adc rdx,0 504 add rdi,r11 505 adc rdx,0 506 mov QWORD[((-16))+r14],rdi 507 mov r13,rdx 508 509 mul rbx 510 add r10,rax 511 mov rax,QWORD[rcx] 512 adc rdx,0 513 mov r11,rdx 514 515 mul rbp 516 add r13,rax 517 mov rax,QWORD[8+r15*1+rsi] 518 adc rdx,0 519 add r13,r10 520 adc rdx,0 521 mov QWORD[((-8))+r14],r13 522 mov rdi,rdx 523 524 mul rbx 525 add r11,rax 526 mov rax,QWORD[16+rcx] 527 adc rdx,0 528 mov r10,rdx 529 530 mul rbp 531 add rdi,rax 532 mov rax,QWORD[16+r15*1+rsi] 533 adc rdx,0 534 add rdi,r11 535 lea rcx,[64+rcx] 536 adc rdx,0 537 mov QWORD[r14],rdi 538 mov r13,rdx 539 540 add r15,32 541 jnz NEAR $L$1st4x 542 543 mul rbx 544 add r10,rax 545 mov rax,QWORD[((-32))+rcx] 546 lea r14,[32+r14] 547 adc rdx,0 548 mov r11,rdx 549 550 mul rbp 551 add r13,rax 552 mov rax,QWORD[((-8))+rsi] 553 adc rdx,0 554 add r13,r10 555 adc rdx,0 556 mov QWORD[((-24))+r14],r13 557 mov rdi,rdx 558 559 mul rbx 560 add r11,rax 561 mov rax,QWORD[((-16))+rcx] 562 adc rdx,0 563 mov r10,rdx 564 565 mul rbp 566 add rdi,rax 567 mov rax,QWORD[r9*1+rsi] 568 adc rdx,0 569 add rdi,r11 570 adc rdx,0 571 mov QWORD[((-16))+r14],rdi 572 mov r13,rdx 573 574DB 102,72,15,126,195 575 lea rcx,[r9*2+rcx] 576 577 xor rdi,rdi 578 add r13,r10 579 adc rdi,0 580 mov QWORD[((-8))+r14],r13 581 582 jmp NEAR $L$outer4x 583 584ALIGN 32 585$L$outer4x: 586 mov r10,QWORD[r9*1+r14] 587 mov rbp,r8 588 mul rbx 589 add r10,rax 590 mov rax,QWORD[rcx] 591 adc rdx,0 592 593 movq xmm0,QWORD[(((-96)))+r12] 594 movq xmm1,QWORD[((-32))+r12] 595 pand xmm0,xmm4 596 movq xmm2,QWORD[32+r12] 597 pand xmm1,xmm5 598 movq xmm3,QWORD[96+r12] 599 600 imul rbp,r10 601DB 0x67 602 mov r11,rdx 603 mov QWORD[r14],rdi 604 605 pand xmm2,xmm6 606 por xmm0,xmm1 607 pand xmm3,xmm7 608 por xmm0,xmm2 609 lea r14,[r9*1+r14] 610 lea r12,[256+r12] 611 por xmm0,xmm3 612 613 mul rbp 614 add r10,rax 615 mov rax,QWORD[8+r9*1+rsi] 616 adc rdx,0 617 mov rdi,rdx 618 619 mul rbx 620 add r11,rax 621 mov rax,QWORD[16+rcx] 622 adc rdx,0 623 add r11,QWORD[8+r14] 624 adc rdx,0 625 mov r10,rdx 626 627 mul rbp 628 add rdi,rax 629 mov rax,QWORD[16+r9*1+rsi] 630 adc rdx,0 631 add rdi,r11 632 lea r15,[32+r9] 633 lea rcx,[64+rcx] 634 adc rdx,0 635 mov r13,rdx 636 jmp NEAR $L$inner4x 637 638ALIGN 32 639$L$inner4x: 640 mul rbx 641 add r10,rax 642 mov rax,QWORD[((-32))+rcx] 643 adc rdx,0 644 add r10,QWORD[16+r14] 645 lea r14,[32+r14] 646 adc rdx,0 647 mov r11,rdx 648 649 mul rbp 650 add r13,rax 651 mov rax,QWORD[((-8))+r15*1+rsi] 652 adc rdx,0 653 add r13,r10 654 adc rdx,0 655 mov QWORD[((-32))+r14],rdi 656 mov rdi,rdx 657 658 mul rbx 659 add r11,rax 660 mov rax,QWORD[((-16))+rcx] 661 adc rdx,0 662 add r11,QWORD[((-8))+r14] 663 adc rdx,0 664 mov r10,rdx 665 666 mul rbp 667 add rdi,rax 668 mov rax,QWORD[r15*1+rsi] 669 adc rdx,0 670 add rdi,r11 671 adc rdx,0 672 mov QWORD[((-24))+r14],r13 673 mov r13,rdx 674 675 mul rbx 676 add r10,rax 677 mov rax,QWORD[rcx] 678 adc rdx,0 679 add r10,QWORD[r14] 680 adc rdx,0 681 mov r11,rdx 682 683 mul rbp 684 add r13,rax 685 mov rax,QWORD[8+r15*1+rsi] 686 adc rdx,0 687 add r13,r10 688 adc rdx,0 689 mov QWORD[((-16))+r14],rdi 690 mov rdi,rdx 691 692 mul rbx 693 add r11,rax 694 mov rax,QWORD[16+rcx] 695 adc rdx,0 696 add r11,QWORD[8+r14] 697 adc rdx,0 698 mov r10,rdx 699 700 mul rbp 701 add rdi,rax 702 mov rax,QWORD[16+r15*1+rsi] 703 adc rdx,0 704 add rdi,r11 705 lea rcx,[64+rcx] 706 adc rdx,0 707 mov QWORD[((-8))+r14],r13 708 mov r13,rdx 709 710 add r15,32 711 jnz NEAR $L$inner4x 712 713 mul rbx 714 add r10,rax 715 mov rax,QWORD[((-32))+rcx] 716 adc rdx,0 717 add r10,QWORD[16+r14] 718 lea r14,[32+r14] 719 adc rdx,0 720 mov r11,rdx 721 722 mul rbp 723 add r13,rax 724 mov rax,QWORD[((-8))+rsi] 725 adc rdx,0 726 add r13,r10 727 adc rdx,0 728 mov QWORD[((-32))+r14],rdi 729 mov rdi,rdx 730 731 mul rbx 732 add r11,rax 733 mov rax,rbp 734 mov rbp,QWORD[((-16))+rcx] 735 adc rdx,0 736 add r11,QWORD[((-8))+r14] 737 adc rdx,0 738 mov r10,rdx 739 740 mul rbp 741 add rdi,rax 742 mov rax,QWORD[r9*1+rsi] 743 adc rdx,0 744 add rdi,r11 745 adc rdx,0 746 mov QWORD[((-24))+r14],r13 747 mov r13,rdx 748 749DB 102,72,15,126,195 750 mov QWORD[((-16))+r14],rdi 751 lea rcx,[r9*2+rcx] 752 753 xor rdi,rdi 754 add r13,r10 755 adc rdi,0 756 add r13,QWORD[r14] 757 adc rdi,0 758 mov QWORD[((-8))+r14],r13 759 760 cmp r12,QWORD[((16+8))+rsp] 761 jb NEAR $L$outer4x 762 sub rbp,r13 763 adc r15,r15 764 or rdi,r15 765 xor rdi,1 766 lea rbx,[r9*1+r14] 767 lea rbp,[rdi*8+rcx] 768 mov rcx,r9 769 sar rcx,3+2 770 mov rdi,QWORD[((56+8))+rsp] 771 jmp NEAR $L$sqr4x_sub 772 773global bn_power5 774 775ALIGN 32 776bn_power5: 777 mov QWORD[8+rsp],rdi ;WIN64 prologue 778 mov QWORD[16+rsp],rsi 779 mov rax,rsp 780$L$SEH_begin_bn_power5: 781 mov rdi,rcx 782 mov rsi,rdx 783 mov rdx,r8 784 mov rcx,r9 785 mov r8,QWORD[40+rsp] 786 mov r9,QWORD[48+rsp] 787 788 789 mov rax,rsp 790 push rbx 791 push rbp 792 push r12 793 push r13 794 push r14 795 push r15 796 lea rsp,[((-40))+rsp] 797 movaps XMMWORD[rsp],xmm6 798 movaps XMMWORD[16+rsp],xmm7 799 mov r10d,r9d 800 shl r9d,3 801 shl r10d,3+2 802 neg r9 803 mov r8,QWORD[r8] 804 805 806 807 808 809 810 811 lea r11,[((-64))+r9*2+rsp] 812 sub r11,rsi 813 and r11,4095 814 cmp r10,r11 815 jb NEAR $L$pwr_sp_alt 816 sub rsp,r11 817 lea rsp,[((-64))+r9*2+rsp] 818 jmp NEAR $L$pwr_sp_done 819 820ALIGN 32 821$L$pwr_sp_alt: 822 lea r10,[((4096-64))+r9*2] 823 lea rsp,[((-64))+r9*2+rsp] 824 sub r11,r10 825 mov r10,0 826 cmovc r11,r10 827 sub rsp,r11 828$L$pwr_sp_done: 829 and rsp,-64 830 mov r10,r9 831 neg r9 832 833 834 835 836 837 838 839 840 841 842 mov QWORD[32+rsp],r8 843 mov QWORD[40+rsp],rax 844$L$power5_body: 845DB 102,72,15,110,207 846DB 102,72,15,110,209 847DB 102,73,15,110,218 848DB 102,72,15,110,226 849 850 call __bn_sqr8x_internal 851 call __bn_sqr8x_internal 852 call __bn_sqr8x_internal 853 call __bn_sqr8x_internal 854 call __bn_sqr8x_internal 855 856DB 102,72,15,126,209 857DB 102,72,15,126,226 858 mov rdi,rsi 859 mov rax,QWORD[40+rsp] 860 lea r8,[32+rsp] 861 862 call mul4x_internal 863 864 mov rsi,QWORD[40+rsp] 865 mov rax,1 866 mov r15,QWORD[((-48))+rsi] 867 mov r14,QWORD[((-40))+rsi] 868 mov r13,QWORD[((-32))+rsi] 869 mov r12,QWORD[((-24))+rsi] 870 mov rbp,QWORD[((-16))+rsi] 871 mov rbx,QWORD[((-8))+rsi] 872 lea rsp,[rsi] 873$L$power5_epilogue: 874 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 875 mov rsi,QWORD[16+rsp] 876 DB 0F3h,0C3h ;repret 877$L$SEH_end_bn_power5: 878 879global bn_sqr8x_internal 880 881 882ALIGN 32 883bn_sqr8x_internal: 884__bn_sqr8x_internal: 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 lea rbp,[32+r10] 959 lea rsi,[r9*1+rsi] 960 961 mov rcx,r9 962 963 964 mov r14,QWORD[((-32))+rbp*1+rsi] 965 lea rdi,[((48+8))+r9*2+rsp] 966 mov rax,QWORD[((-24))+rbp*1+rsi] 967 lea rdi,[((-32))+rbp*1+rdi] 968 mov rbx,QWORD[((-16))+rbp*1+rsi] 969 mov r15,rax 970 971 mul r14 972 mov r10,rax 973 mov rax,rbx 974 mov r11,rdx 975 mov QWORD[((-24))+rbp*1+rdi],r10 976 977 mul r14 978 add r11,rax 979 mov rax,rbx 980 adc rdx,0 981 mov QWORD[((-16))+rbp*1+rdi],r11 982 mov r10,rdx 983 984 985 mov rbx,QWORD[((-8))+rbp*1+rsi] 986 mul r15 987 mov r12,rax 988 mov rax,rbx 989 mov r13,rdx 990 991 lea rcx,[rbp] 992 mul r14 993 add r10,rax 994 mov rax,rbx 995 mov r11,rdx 996 adc r11,0 997 add r10,r12 998 adc r11,0 999 mov QWORD[((-8))+rcx*1+rdi],r10 1000 jmp NEAR $L$sqr4x_1st 1001 1002ALIGN 32 1003$L$sqr4x_1st: 1004 mov rbx,QWORD[rcx*1+rsi] 1005 mul r15 1006 add r13,rax 1007 mov rax,rbx 1008 mov r12,rdx 1009 adc r12,0 1010 1011 mul r14 1012 add r11,rax 1013 mov rax,rbx 1014 mov rbx,QWORD[8+rcx*1+rsi] 1015 mov r10,rdx 1016 adc r10,0 1017 add r11,r13 1018 adc r10,0 1019 1020 1021 mul r15 1022 add r12,rax 1023 mov rax,rbx 1024 mov QWORD[rcx*1+rdi],r11 1025 mov r13,rdx 1026 adc r13,0 1027 1028 mul r14 1029 add r10,rax 1030 mov rax,rbx 1031 mov rbx,QWORD[16+rcx*1+rsi] 1032 mov r11,rdx 1033 adc r11,0 1034 add r10,r12 1035 adc r11,0 1036 1037 mul r15 1038 add r13,rax 1039 mov rax,rbx 1040 mov QWORD[8+rcx*1+rdi],r10 1041 mov r12,rdx 1042 adc r12,0 1043 1044 mul r14 1045 add r11,rax 1046 mov rax,rbx 1047 mov rbx,QWORD[24+rcx*1+rsi] 1048 mov r10,rdx 1049 adc r10,0 1050 add r11,r13 1051 adc r10,0 1052 1053 1054 mul r15 1055 add r12,rax 1056 mov rax,rbx 1057 mov QWORD[16+rcx*1+rdi],r11 1058 mov r13,rdx 1059 adc r13,0 1060 lea rcx,[32+rcx] 1061 1062 mul r14 1063 add r10,rax 1064 mov rax,rbx 1065 mov r11,rdx 1066 adc r11,0 1067 add r10,r12 1068 adc r11,0 1069 mov QWORD[((-8))+rcx*1+rdi],r10 1070 1071 cmp rcx,0 1072 jne NEAR $L$sqr4x_1st 1073 1074 mul r15 1075 add r13,rax 1076 lea rbp,[16+rbp] 1077 adc rdx,0 1078 add r13,r11 1079 adc rdx,0 1080 1081 mov QWORD[rdi],r13 1082 mov r12,rdx 1083 mov QWORD[8+rdi],rdx 1084 jmp NEAR $L$sqr4x_outer 1085 1086ALIGN 32 1087$L$sqr4x_outer: 1088 mov r14,QWORD[((-32))+rbp*1+rsi] 1089 lea rdi,[((48+8))+r9*2+rsp] 1090 mov rax,QWORD[((-24))+rbp*1+rsi] 1091 lea rdi,[((-32))+rbp*1+rdi] 1092 mov rbx,QWORD[((-16))+rbp*1+rsi] 1093 mov r15,rax 1094 1095 mul r14 1096 mov r10,QWORD[((-24))+rbp*1+rdi] 1097 add r10,rax 1098 mov rax,rbx 1099 adc rdx,0 1100 mov QWORD[((-24))+rbp*1+rdi],r10 1101 mov r11,rdx 1102 1103 mul r14 1104 add r11,rax 1105 mov rax,rbx 1106 adc rdx,0 1107 add r11,QWORD[((-16))+rbp*1+rdi] 1108 mov r10,rdx 1109 adc r10,0 1110 mov QWORD[((-16))+rbp*1+rdi],r11 1111 1112 xor r12,r12 1113 1114 mov rbx,QWORD[((-8))+rbp*1+rsi] 1115 mul r15 1116 add r12,rax 1117 mov rax,rbx 1118 adc rdx,0 1119 add r12,QWORD[((-8))+rbp*1+rdi] 1120 mov r13,rdx 1121 adc r13,0 1122 1123 mul r14 1124 add r10,rax 1125 mov rax,rbx 1126 adc rdx,0 1127 add r10,r12 1128 mov r11,rdx 1129 adc r11,0 1130 mov QWORD[((-8))+rbp*1+rdi],r10 1131 1132 lea rcx,[rbp] 1133 jmp NEAR $L$sqr4x_inner 1134 1135ALIGN 32 1136$L$sqr4x_inner: 1137 mov rbx,QWORD[rcx*1+rsi] 1138 mul r15 1139 add r13,rax 1140 mov rax,rbx 1141 mov r12,rdx 1142 adc r12,0 1143 add r13,QWORD[rcx*1+rdi] 1144 adc r12,0 1145 1146DB 0x67 1147 mul r14 1148 add r11,rax 1149 mov rax,rbx 1150 mov rbx,QWORD[8+rcx*1+rsi] 1151 mov r10,rdx 1152 adc r10,0 1153 add r11,r13 1154 adc r10,0 1155 1156 mul r15 1157 add r12,rax 1158 mov QWORD[rcx*1+rdi],r11 1159 mov rax,rbx 1160 mov r13,rdx 1161 adc r13,0 1162 add r12,QWORD[8+rcx*1+rdi] 1163 lea rcx,[16+rcx] 1164 adc r13,0 1165 1166 mul r14 1167 add r10,rax 1168 mov rax,rbx 1169 adc rdx,0 1170 add r10,r12 1171 mov r11,rdx 1172 adc r11,0 1173 mov QWORD[((-8))+rcx*1+rdi],r10 1174 1175 cmp rcx,0 1176 jne NEAR $L$sqr4x_inner 1177 1178DB 0x67 1179 mul r15 1180 add r13,rax 1181 adc rdx,0 1182 add r13,r11 1183 adc rdx,0 1184 1185 mov QWORD[rdi],r13 1186 mov r12,rdx 1187 mov QWORD[8+rdi],rdx 1188 1189 add rbp,16 1190 jnz NEAR $L$sqr4x_outer 1191 1192 1193 mov r14,QWORD[((-32))+rsi] 1194 lea rdi,[((48+8))+r9*2+rsp] 1195 mov rax,QWORD[((-24))+rsi] 1196 lea rdi,[((-32))+rbp*1+rdi] 1197 mov rbx,QWORD[((-16))+rsi] 1198 mov r15,rax 1199 1200 mul r14 1201 add r10,rax 1202 mov rax,rbx 1203 mov r11,rdx 1204 adc r11,0 1205 1206 mul r14 1207 add r11,rax 1208 mov rax,rbx 1209 mov QWORD[((-24))+rdi],r10 1210 mov r10,rdx 1211 adc r10,0 1212 add r11,r13 1213 mov rbx,QWORD[((-8))+rsi] 1214 adc r10,0 1215 1216 mul r15 1217 add r12,rax 1218 mov rax,rbx 1219 mov QWORD[((-16))+rdi],r11 1220 mov r13,rdx 1221 adc r13,0 1222 1223 mul r14 1224 add r10,rax 1225 mov rax,rbx 1226 mov r11,rdx 1227 adc r11,0 1228 add r10,r12 1229 adc r11,0 1230 mov QWORD[((-8))+rdi],r10 1231 1232 mul r15 1233 add r13,rax 1234 mov rax,QWORD[((-16))+rsi] 1235 adc rdx,0 1236 add r13,r11 1237 adc rdx,0 1238 1239 mov QWORD[rdi],r13 1240 mov r12,rdx 1241 mov QWORD[8+rdi],rdx 1242 1243 mul rbx 1244 add rbp,16 1245 xor r14,r14 1246 sub rbp,r9 1247 xor r15,r15 1248 1249 add rax,r12 1250 adc rdx,0 1251 mov QWORD[8+rdi],rax 1252 mov QWORD[16+rdi],rdx 1253 mov QWORD[24+rdi],r15 1254 1255 mov rax,QWORD[((-16))+rbp*1+rsi] 1256 lea rdi,[((48+8))+rsp] 1257 xor r10,r10 1258 mov r11,QWORD[8+rdi] 1259 1260 lea r12,[r10*2+r14] 1261 shr r10,63 1262 lea r13,[r11*2+rcx] 1263 shr r11,63 1264 or r13,r10 1265 mov r10,QWORD[16+rdi] 1266 mov r14,r11 1267 mul rax 1268 neg r15 1269 mov r11,QWORD[24+rdi] 1270 adc r12,rax 1271 mov rax,QWORD[((-8))+rbp*1+rsi] 1272 mov QWORD[rdi],r12 1273 adc r13,rdx 1274 1275 lea rbx,[r10*2+r14] 1276 mov QWORD[8+rdi],r13 1277 sbb r15,r15 1278 shr r10,63 1279 lea r8,[r11*2+rcx] 1280 shr r11,63 1281 or r8,r10 1282 mov r10,QWORD[32+rdi] 1283 mov r14,r11 1284 mul rax 1285 neg r15 1286 mov r11,QWORD[40+rdi] 1287 adc rbx,rax 1288 mov rax,QWORD[rbp*1+rsi] 1289 mov QWORD[16+rdi],rbx 1290 adc r8,rdx 1291 lea rbp,[16+rbp] 1292 mov QWORD[24+rdi],r8 1293 sbb r15,r15 1294 lea rdi,[64+rdi] 1295 jmp NEAR $L$sqr4x_shift_n_add 1296 1297ALIGN 32 1298$L$sqr4x_shift_n_add: 1299 lea r12,[r10*2+r14] 1300 shr r10,63 1301 lea r13,[r11*2+rcx] 1302 shr r11,63 1303 or r13,r10 1304 mov r10,QWORD[((-16))+rdi] 1305 mov r14,r11 1306 mul rax 1307 neg r15 1308 mov r11,QWORD[((-8))+rdi] 1309 adc r12,rax 1310 mov rax,QWORD[((-8))+rbp*1+rsi] 1311 mov QWORD[((-32))+rdi],r12 1312 adc r13,rdx 1313 1314 lea rbx,[r10*2+r14] 1315 mov QWORD[((-24))+rdi],r13 1316 sbb r15,r15 1317 shr r10,63 1318 lea r8,[r11*2+rcx] 1319 shr r11,63 1320 or r8,r10 1321 mov r10,QWORD[rdi] 1322 mov r14,r11 1323 mul rax 1324 neg r15 1325 mov r11,QWORD[8+rdi] 1326 adc rbx,rax 1327 mov rax,QWORD[rbp*1+rsi] 1328 mov QWORD[((-16))+rdi],rbx 1329 adc r8,rdx 1330 1331 lea r12,[r10*2+r14] 1332 mov QWORD[((-8))+rdi],r8 1333 sbb r15,r15 1334 shr r10,63 1335 lea r13,[r11*2+rcx] 1336 shr r11,63 1337 or r13,r10 1338 mov r10,QWORD[16+rdi] 1339 mov r14,r11 1340 mul rax 1341 neg r15 1342 mov r11,QWORD[24+rdi] 1343 adc r12,rax 1344 mov rax,QWORD[8+rbp*1+rsi] 1345 mov QWORD[rdi],r12 1346 adc r13,rdx 1347 1348 lea rbx,[r10*2+r14] 1349 mov QWORD[8+rdi],r13 1350 sbb r15,r15 1351 shr r10,63 1352 lea r8,[r11*2+rcx] 1353 shr r11,63 1354 or r8,r10 1355 mov r10,QWORD[32+rdi] 1356 mov r14,r11 1357 mul rax 1358 neg r15 1359 mov r11,QWORD[40+rdi] 1360 adc rbx,rax 1361 mov rax,QWORD[16+rbp*1+rsi] 1362 mov QWORD[16+rdi],rbx 1363 adc r8,rdx 1364 mov QWORD[24+rdi],r8 1365 sbb r15,r15 1366 lea rdi,[64+rdi] 1367 add rbp,32 1368 jnz NEAR $L$sqr4x_shift_n_add 1369 1370 lea r12,[r10*2+r14] 1371DB 0x67 1372 shr r10,63 1373 lea r13,[r11*2+rcx] 1374 shr r11,63 1375 or r13,r10 1376 mov r10,QWORD[((-16))+rdi] 1377 mov r14,r11 1378 mul rax 1379 neg r15 1380 mov r11,QWORD[((-8))+rdi] 1381 adc r12,rax 1382 mov rax,QWORD[((-8))+rsi] 1383 mov QWORD[((-32))+rdi],r12 1384 adc r13,rdx 1385 1386 lea rbx,[r10*2+r14] 1387 mov QWORD[((-24))+rdi],r13 1388 sbb r15,r15 1389 shr r10,63 1390 lea r8,[r11*2+rcx] 1391 shr r11,63 1392 or r8,r10 1393 mul rax 1394 neg r15 1395 adc rbx,rax 1396 adc r8,rdx 1397 mov QWORD[((-16))+rdi],rbx 1398 mov QWORD[((-8))+rdi],r8 1399DB 102,72,15,126,213 1400sqr8x_reduction: 1401 xor rax,rax 1402 lea rcx,[r9*2+rbp] 1403 lea rdx,[((48+8))+r9*2+rsp] 1404 mov QWORD[((0+8))+rsp],rcx 1405 lea rdi,[((48+8))+r9*1+rsp] 1406 mov QWORD[((8+8))+rsp],rdx 1407 neg r9 1408 jmp NEAR $L$8x_reduction_loop 1409 1410ALIGN 32 1411$L$8x_reduction_loop: 1412 lea rdi,[r9*1+rdi] 1413DB 0x66 1414 mov rbx,QWORD[rdi] 1415 mov r9,QWORD[8+rdi] 1416 mov r10,QWORD[16+rdi] 1417 mov r11,QWORD[24+rdi] 1418 mov r12,QWORD[32+rdi] 1419 mov r13,QWORD[40+rdi] 1420 mov r14,QWORD[48+rdi] 1421 mov r15,QWORD[56+rdi] 1422 mov QWORD[rdx],rax 1423 lea rdi,[64+rdi] 1424 1425DB 0x67 1426 mov r8,rbx 1427 imul rbx,QWORD[((32+8))+rsp] 1428 mov rax,QWORD[rbp] 1429 mov ecx,8 1430 jmp NEAR $L$8x_reduce 1431 1432ALIGN 32 1433$L$8x_reduce: 1434 mul rbx 1435 mov rax,QWORD[16+rbp] 1436 neg r8 1437 mov r8,rdx 1438 adc r8,0 1439 1440 mul rbx 1441 add r9,rax 1442 mov rax,QWORD[32+rbp] 1443 adc rdx,0 1444 add r8,r9 1445 mov QWORD[((48-8+8))+rcx*8+rsp],rbx 1446 mov r9,rdx 1447 adc r9,0 1448 1449 mul rbx 1450 add r10,rax 1451 mov rax,QWORD[48+rbp] 1452 adc rdx,0 1453 add r9,r10 1454 mov rsi,QWORD[((32+8))+rsp] 1455 mov r10,rdx 1456 adc r10,0 1457 1458 mul rbx 1459 add r11,rax 1460 mov rax,QWORD[64+rbp] 1461 adc rdx,0 1462 imul rsi,r8 1463 add r10,r11 1464 mov r11,rdx 1465 adc r11,0 1466 1467 mul rbx 1468 add r12,rax 1469 mov rax,QWORD[80+rbp] 1470 adc rdx,0 1471 add r11,r12 1472 mov r12,rdx 1473 adc r12,0 1474 1475 mul rbx 1476 add r13,rax 1477 mov rax,QWORD[96+rbp] 1478 adc rdx,0 1479 add r12,r13 1480 mov r13,rdx 1481 adc r13,0 1482 1483 mul rbx 1484 add r14,rax 1485 mov rax,QWORD[112+rbp] 1486 adc rdx,0 1487 add r13,r14 1488 mov r14,rdx 1489 adc r14,0 1490 1491 mul rbx 1492 mov rbx,rsi 1493 add r15,rax 1494 mov rax,QWORD[rbp] 1495 adc rdx,0 1496 add r14,r15 1497 mov r15,rdx 1498 adc r15,0 1499 1500 dec ecx 1501 jnz NEAR $L$8x_reduce 1502 1503 lea rbp,[128+rbp] 1504 xor rax,rax 1505 mov rdx,QWORD[((8+8))+rsp] 1506 cmp rbp,QWORD[((0+8))+rsp] 1507 jae NEAR $L$8x_no_tail 1508 1509DB 0x66 1510 add r8,QWORD[rdi] 1511 adc r9,QWORD[8+rdi] 1512 adc r10,QWORD[16+rdi] 1513 adc r11,QWORD[24+rdi] 1514 adc r12,QWORD[32+rdi] 1515 adc r13,QWORD[40+rdi] 1516 adc r14,QWORD[48+rdi] 1517 adc r15,QWORD[56+rdi] 1518 sbb rsi,rsi 1519 1520 mov rbx,QWORD[((48+56+8))+rsp] 1521 mov ecx,8 1522 mov rax,QWORD[rbp] 1523 jmp NEAR $L$8x_tail 1524 1525ALIGN 32 1526$L$8x_tail: 1527 mul rbx 1528 add r8,rax 1529 mov rax,QWORD[16+rbp] 1530 mov QWORD[rdi],r8 1531 mov r8,rdx 1532 adc r8,0 1533 1534 mul rbx 1535 add r9,rax 1536 mov rax,QWORD[32+rbp] 1537 adc rdx,0 1538 add r8,r9 1539 lea rdi,[8+rdi] 1540 mov r9,rdx 1541 adc r9,0 1542 1543 mul rbx 1544 add r10,rax 1545 mov rax,QWORD[48+rbp] 1546 adc rdx,0 1547 add r9,r10 1548 mov r10,rdx 1549 adc r10,0 1550 1551 mul rbx 1552 add r11,rax 1553 mov rax,QWORD[64+rbp] 1554 adc rdx,0 1555 add r10,r11 1556 mov r11,rdx 1557 adc r11,0 1558 1559 mul rbx 1560 add r12,rax 1561 mov rax,QWORD[80+rbp] 1562 adc rdx,0 1563 add r11,r12 1564 mov r12,rdx 1565 adc r12,0 1566 1567 mul rbx 1568 add r13,rax 1569 mov rax,QWORD[96+rbp] 1570 adc rdx,0 1571 add r12,r13 1572 mov r13,rdx 1573 adc r13,0 1574 1575 mul rbx 1576 add r14,rax 1577 mov rax,QWORD[112+rbp] 1578 adc rdx,0 1579 add r13,r14 1580 mov r14,rdx 1581 adc r14,0 1582 1583 mul rbx 1584 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] 1585 add r15,rax 1586 adc rdx,0 1587 add r14,r15 1588 mov rax,QWORD[rbp] 1589 mov r15,rdx 1590 adc r15,0 1591 1592 dec ecx 1593 jnz NEAR $L$8x_tail 1594 1595 lea rbp,[128+rbp] 1596 mov rdx,QWORD[((8+8))+rsp] 1597 cmp rbp,QWORD[((0+8))+rsp] 1598 jae NEAR $L$8x_tail_done 1599 1600 mov rbx,QWORD[((48+56+8))+rsp] 1601 neg rsi 1602 mov rax,QWORD[rbp] 1603 adc r8,QWORD[rdi] 1604 adc r9,QWORD[8+rdi] 1605 adc r10,QWORD[16+rdi] 1606 adc r11,QWORD[24+rdi] 1607 adc r12,QWORD[32+rdi] 1608 adc r13,QWORD[40+rdi] 1609 adc r14,QWORD[48+rdi] 1610 adc r15,QWORD[56+rdi] 1611 sbb rsi,rsi 1612 1613 mov ecx,8 1614 jmp NEAR $L$8x_tail 1615 1616ALIGN 32 1617$L$8x_tail_done: 1618 add r8,QWORD[rdx] 1619 xor rax,rax 1620 1621 neg rsi 1622$L$8x_no_tail: 1623 adc r8,QWORD[rdi] 1624 adc r9,QWORD[8+rdi] 1625 adc r10,QWORD[16+rdi] 1626 adc r11,QWORD[24+rdi] 1627 adc r12,QWORD[32+rdi] 1628 adc r13,QWORD[40+rdi] 1629 adc r14,QWORD[48+rdi] 1630 adc r15,QWORD[56+rdi] 1631 adc rax,0 1632 mov rcx,QWORD[((-16))+rbp] 1633 xor rsi,rsi 1634 1635DB 102,72,15,126,213 1636 1637 mov QWORD[rdi],r8 1638 mov QWORD[8+rdi],r9 1639DB 102,73,15,126,217 1640 mov QWORD[16+rdi],r10 1641 mov QWORD[24+rdi],r11 1642 mov QWORD[32+rdi],r12 1643 mov QWORD[40+rdi],r13 1644 mov QWORD[48+rdi],r14 1645 mov QWORD[56+rdi],r15 1646 lea rdi,[64+rdi] 1647 1648 cmp rdi,rdx 1649 jb NEAR $L$8x_reduction_loop 1650 1651 sub rcx,r15 1652 lea rbx,[r9*1+rdi] 1653 adc rsi,rsi 1654 mov rcx,r9 1655 or rax,rsi 1656DB 102,72,15,126,207 1657 xor rax,1 1658DB 102,72,15,126,206 1659 lea rbp,[rax*8+rbp] 1660 sar rcx,3+2 1661 jmp NEAR $L$sqr4x_sub 1662 1663ALIGN 32 1664$L$sqr4x_sub: 1665DB 0x66 1666 mov r12,QWORD[rbx] 1667 mov r13,QWORD[8+rbx] 1668 sbb r12,QWORD[rbp] 1669 mov r14,QWORD[16+rbx] 1670 sbb r13,QWORD[16+rbp] 1671 mov r15,QWORD[24+rbx] 1672 lea rbx,[32+rbx] 1673 sbb r14,QWORD[32+rbp] 1674 mov QWORD[rdi],r12 1675 sbb r15,QWORD[48+rbp] 1676 lea rbp,[64+rbp] 1677 mov QWORD[8+rdi],r13 1678 mov QWORD[16+rdi],r14 1679 mov QWORD[24+rdi],r15 1680 lea rdi,[32+rdi] 1681 1682 inc rcx 1683 jnz NEAR $L$sqr4x_sub 1684 mov r10,r9 1685 neg r9 1686 DB 0F3h,0C3h ;repret 1687 1688global bn_from_montgomery 1689 1690ALIGN 32 1691bn_from_montgomery: 1692 test DWORD[48+rsp],7 1693 jz NEAR bn_from_mont8x 1694 xor eax,eax 1695 DB 0F3h,0C3h ;repret 1696 1697 1698 1699ALIGN 32 1700bn_from_mont8x: 1701 mov QWORD[8+rsp],rdi ;WIN64 prologue 1702 mov QWORD[16+rsp],rsi 1703 mov rax,rsp 1704$L$SEH_begin_bn_from_mont8x: 1705 mov rdi,rcx 1706 mov rsi,rdx 1707 mov rdx,r8 1708 mov rcx,r9 1709 mov r8,QWORD[40+rsp] 1710 mov r9,QWORD[48+rsp] 1711 1712 1713DB 0x67 1714 mov rax,rsp 1715 push rbx 1716 push rbp 1717 push r12 1718 push r13 1719 push r14 1720 push r15 1721 lea rsp,[((-40))+rsp] 1722 movaps XMMWORD[rsp],xmm6 1723 movaps XMMWORD[16+rsp],xmm7 1724DB 0x67 1725 mov r10d,r9d 1726 shl r9d,3 1727 shl r10d,3+2 1728 neg r9 1729 mov r8,QWORD[r8] 1730 1731 1732 1733 1734 1735 1736 1737 lea r11,[((-64))+r9*2+rsp] 1738 sub r11,rsi 1739 and r11,4095 1740 cmp r10,r11 1741 jb NEAR $L$from_sp_alt 1742 sub rsp,r11 1743 lea rsp,[((-64))+r9*2+rsp] 1744 jmp NEAR $L$from_sp_done 1745 1746ALIGN 32 1747$L$from_sp_alt: 1748 lea r10,[((4096-64))+r9*2] 1749 lea rsp,[((-64))+r9*2+rsp] 1750 sub r11,r10 1751 mov r10,0 1752 cmovc r11,r10 1753 sub rsp,r11 1754$L$from_sp_done: 1755 and rsp,-64 1756 mov r10,r9 1757 neg r9 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 mov QWORD[32+rsp],r8 1769 mov QWORD[40+rsp],rax 1770$L$from_body: 1771 mov r11,r9 1772 lea rax,[48+rsp] 1773 pxor xmm0,xmm0 1774 jmp NEAR $L$mul_by_1 1775 1776ALIGN 32 1777$L$mul_by_1: 1778 movdqu xmm1,XMMWORD[rsi] 1779 movdqu xmm2,XMMWORD[16+rsi] 1780 movdqu xmm3,XMMWORD[32+rsi] 1781 movdqa XMMWORD[r9*1+rax],xmm0 1782 movdqu xmm4,XMMWORD[48+rsi] 1783 movdqa XMMWORD[16+r9*1+rax],xmm0 1784DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 1785 movdqa XMMWORD[rax],xmm1 1786 movdqa XMMWORD[32+r9*1+rax],xmm0 1787 movdqa XMMWORD[16+rax],xmm2 1788 movdqa XMMWORD[48+r9*1+rax],xmm0 1789 movdqa XMMWORD[32+rax],xmm3 1790 movdqa XMMWORD[48+rax],xmm4 1791 lea rax,[64+rax] 1792 sub r11,64 1793 jnz NEAR $L$mul_by_1 1794 1795DB 102,72,15,110,207 1796DB 102,72,15,110,209 1797DB 0x67 1798 mov rbp,rcx 1799DB 102,73,15,110,218 1800 call sqr8x_reduction 1801 1802 pxor xmm0,xmm0 1803 lea rax,[48+rsp] 1804 mov rsi,QWORD[40+rsp] 1805 jmp NEAR $L$from_mont_zero 1806 1807ALIGN 32 1808$L$from_mont_zero: 1809 movdqa XMMWORD[rax],xmm0 1810 movdqa XMMWORD[16+rax],xmm0 1811 movdqa XMMWORD[32+rax],xmm0 1812 movdqa XMMWORD[48+rax],xmm0 1813 lea rax,[64+rax] 1814 sub r9,32 1815 jnz NEAR $L$from_mont_zero 1816 1817 mov rax,1 1818 mov r15,QWORD[((-48))+rsi] 1819 mov r14,QWORD[((-40))+rsi] 1820 mov r13,QWORD[((-32))+rsi] 1821 mov r12,QWORD[((-24))+rsi] 1822 mov rbp,QWORD[((-16))+rsi] 1823 mov rbx,QWORD[((-8))+rsi] 1824 lea rsp,[rsi] 1825$L$from_epilogue: 1826 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1827 mov rsi,QWORD[16+rsp] 1828 DB 0F3h,0C3h ;repret 1829$L$SEH_end_bn_from_mont8x: 1830global bn_scatter5 1831 1832ALIGN 16 1833bn_scatter5: 1834 cmp edx,0 1835 jz NEAR $L$scatter_epilogue 1836 lea r8,[r9*8+r8] 1837$L$scatter: 1838 mov rax,QWORD[rcx] 1839 lea rcx,[8+rcx] 1840 mov QWORD[r8],rax 1841 lea r8,[256+r8] 1842 sub edx,1 1843 jnz NEAR $L$scatter 1844$L$scatter_epilogue: 1845 DB 0F3h,0C3h ;repret 1846 1847 1848global bn_gather5 1849 1850ALIGN 16 1851bn_gather5: 1852$L$SEH_begin_bn_gather5: 1853 1854DB 0x48,0x83,0xec,0x28 1855DB 0x0f,0x29,0x34,0x24 1856DB 0x0f,0x29,0x7c,0x24,0x10 1857 mov r11d,r9d 1858 shr r9d,3 1859 and r11,7 1860 not r9d 1861 lea rax,[$L$magic_masks] 1862 and r9d,3 1863 lea r8,[128+r11*8+r8] 1864 movq xmm4,QWORD[r9*8+rax] 1865 movq xmm5,QWORD[8+r9*8+rax] 1866 movq xmm6,QWORD[16+r9*8+rax] 1867 movq xmm7,QWORD[24+r9*8+rax] 1868 jmp NEAR $L$gather 1869ALIGN 16 1870$L$gather: 1871 movq xmm0,QWORD[(((-128)))+r8] 1872 movq xmm1,QWORD[((-64))+r8] 1873 pand xmm0,xmm4 1874 movq xmm2,QWORD[r8] 1875 pand xmm1,xmm5 1876 movq xmm3,QWORD[64+r8] 1877 pand xmm2,xmm6 1878 por xmm0,xmm1 1879 pand xmm3,xmm7 1880DB 0x67,0x67 1881 por xmm0,xmm2 1882 lea r8,[256+r8] 1883 por xmm0,xmm3 1884 1885 movq QWORD[rcx],xmm0 1886 lea rcx,[8+rcx] 1887 sub edx,1 1888 jnz NEAR $L$gather 1889 movaps xmm6,XMMWORD[rsp] 1890 movaps xmm7,XMMWORD[16+rsp] 1891 lea rsp,[40+rsp] 1892 DB 0F3h,0C3h ;repret 1893$L$SEH_end_bn_gather5: 1894 1895ALIGN 64 1896$L$magic_masks: 1897 DD 0,0,0,0,0,0,-1,-1 1898 DD 0,0,0,0,0,0,0,0 1899DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 1900DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 1901DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 1902DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 1903DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 1904DB 112,101,110,115,115,108,46,111,114,103,62,0 1905EXTERN __imp_RtlVirtualUnwind 1906 1907ALIGN 16 1908mul_handler: 1909 push rsi 1910 push rdi 1911 push rbx 1912 push rbp 1913 push r12 1914 push r13 1915 push r14 1916 push r15 1917 pushfq 1918 sub rsp,64 1919 1920 mov rax,QWORD[120+r8] 1921 mov rbx,QWORD[248+r8] 1922 1923 mov rsi,QWORD[8+r9] 1924 mov r11,QWORD[56+r9] 1925 1926 mov r10d,DWORD[r11] 1927 lea r10,[r10*1+rsi] 1928 cmp rbx,r10 1929 jb NEAR $L$common_seh_tail 1930 1931 mov rax,QWORD[152+r8] 1932 1933 mov r10d,DWORD[4+r11] 1934 lea r10,[r10*1+rsi] 1935 cmp rbx,r10 1936 jae NEAR $L$common_seh_tail 1937 1938 lea r10,[$L$mul_epilogue] 1939 cmp rbx,r10 1940 jb NEAR $L$body_40 1941 1942 mov r10,QWORD[192+r8] 1943 mov rax,QWORD[8+r10*8+rax] 1944 jmp NEAR $L$body_proceed 1945 1946$L$body_40: 1947 mov rax,QWORD[40+rax] 1948$L$body_proceed: 1949 1950 movaps xmm0,XMMWORD[((-88))+rax] 1951 movaps xmm1,XMMWORD[((-72))+rax] 1952 1953 mov rbx,QWORD[((-8))+rax] 1954 mov rbp,QWORD[((-16))+rax] 1955 mov r12,QWORD[((-24))+rax] 1956 mov r13,QWORD[((-32))+rax] 1957 mov r14,QWORD[((-40))+rax] 1958 mov r15,QWORD[((-48))+rax] 1959 mov QWORD[144+r8],rbx 1960 mov QWORD[160+r8],rbp 1961 mov QWORD[216+r8],r12 1962 mov QWORD[224+r8],r13 1963 mov QWORD[232+r8],r14 1964 mov QWORD[240+r8],r15 1965 movups XMMWORD[512+r8],xmm0 1966 movups XMMWORD[528+r8],xmm1 1967 1968$L$common_seh_tail: 1969 mov rdi,QWORD[8+rax] 1970 mov rsi,QWORD[16+rax] 1971 mov QWORD[152+r8],rax 1972 mov QWORD[168+r8],rsi 1973 mov QWORD[176+r8],rdi 1974 1975 mov rdi,QWORD[40+r9] 1976 mov rsi,r8 1977 mov ecx,154 1978 DD 0xa548f3fc 1979 1980 mov rsi,r9 1981 xor rcx,rcx 1982 mov rdx,QWORD[8+rsi] 1983 mov r8,QWORD[rsi] 1984 mov r9,QWORD[16+rsi] 1985 mov r10,QWORD[40+rsi] 1986 lea r11,[56+rsi] 1987 lea r12,[24+rsi] 1988 mov QWORD[32+rsp],r10 1989 mov QWORD[40+rsp],r11 1990 mov QWORD[48+rsp],r12 1991 mov QWORD[56+rsp],rcx 1992 call QWORD[__imp_RtlVirtualUnwind] 1993 1994 mov eax,1 1995 add rsp,64 1996 popfq 1997 pop r15 1998 pop r14 1999 pop r13 2000 pop r12 2001 pop rbp 2002 pop rbx 2003 pop rdi 2004 pop rsi 2005 DB 0F3h,0C3h ;repret 2006 2007 2008section .pdata rdata align=4 2009ALIGN 4 2010 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase 2011 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase 2012 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase 2013 2014 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase 2015 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase 2016 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase 2017 2018 DD $L$SEH_begin_bn_power5 wrt ..imagebase 2019 DD $L$SEH_end_bn_power5 wrt ..imagebase 2020 DD $L$SEH_info_bn_power5 wrt ..imagebase 2021 2022 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase 2023 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase 2024 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase 2025 DD $L$SEH_begin_bn_gather5 wrt ..imagebase 2026 DD $L$SEH_end_bn_gather5 wrt ..imagebase 2027 DD $L$SEH_info_bn_gather5 wrt ..imagebase 2028 2029section .xdata rdata align=8 2030ALIGN 8 2031$L$SEH_info_bn_mul_mont_gather5: 2032DB 9,0,0,0 2033 DD mul_handler wrt ..imagebase 2034 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 2035ALIGN 8 2036$L$SEH_info_bn_mul4x_mont_gather5: 2037DB 9,0,0,0 2038 DD mul_handler wrt ..imagebase 2039 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 2040ALIGN 8 2041$L$SEH_info_bn_power5: 2042DB 9,0,0,0 2043 DD mul_handler wrt ..imagebase 2044 DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase 2045ALIGN 8 2046$L$SEH_info_bn_from_mont8x: 2047DB 9,0,0,0 2048 DD mul_handler wrt ..imagebase 2049 DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase 2050ALIGN 8 2051$L$SEH_info_bn_gather5: 2052DB 0x01,0x0d,0x05,0x00 2053DB 0x0d,0x78,0x01,0x00 2054DB 0x08,0x68,0x00,0x00 2055DB 0x04,0x42,0x00,0x00 2056ALIGN 8 2057