1default rel 2%define XMMWORD 3%define YMMWORD 4%define ZMMWORD 5section .text code align=64 6 7 8EXTERN OPENSSL_ia32cap_P 9 10global bn_mul_mont_gather5 11 12ALIGN 64 13bn_mul_mont_gather5: 14 mov QWORD[8+rsp],rdi ;WIN64 prologue 15 mov QWORD[16+rsp],rsi 16 mov rax,rsp 17$L$SEH_begin_bn_mul_mont_gather5: 18 mov rdi,rcx 19 mov rsi,rdx 20 mov rdx,r8 21 mov rcx,r9 22 mov r8,QWORD[40+rsp] 23 mov r9,QWORD[48+rsp] 24 25 26 test r9d,7 27 jnz NEAR $L$mul_enter 28 jmp NEAR $L$mul4x_enter 29 30ALIGN 16 31$L$mul_enter: 32 mov r9d,r9d 33 mov rax,rsp 34 mov r10d,DWORD[56+rsp] 35 push rbx 36 push rbp 37 push r12 38 push r13 39 push r14 40 push r15 41 lea rsp,[((-40))+rsp] 42 movaps XMMWORD[rsp],xmm6 43 movaps XMMWORD[16+rsp],xmm7 44 lea r11,[2+r9] 45 neg r11 46 lea rsp,[r11*8+rsp] 47 and rsp,-1024 48 49 mov QWORD[8+r9*8+rsp],rax 50$L$mul_body: 51 mov r12,rdx 52 mov r11,r10 53 shr r10,3 54 and r11,7 55 not r10 56 lea rax,[$L$magic_masks] 57 and r10,3 58 lea r12,[96+r11*8+r12] 59 movq xmm4,QWORD[r10*8+rax] 60 movq xmm5,QWORD[8+r10*8+rax] 61 movq xmm6,QWORD[16+r10*8+rax] 62 movq xmm7,QWORD[24+r10*8+rax] 63 64 movq xmm0,QWORD[(((-96)))+r12] 65 movq xmm1,QWORD[((-32))+r12] 66 pand xmm0,xmm4 67 movq xmm2,QWORD[32+r12] 68 pand xmm1,xmm5 69 movq xmm3,QWORD[96+r12] 70 pand xmm2,xmm6 71 por xmm0,xmm1 72 pand xmm3,xmm7 73 por xmm0,xmm2 74 lea r12,[256+r12] 75 por xmm0,xmm3 76 77DB 102,72,15,126,195 78 79 mov r8,QWORD[r8] 80 mov rax,QWORD[rsi] 81 82 xor r14,r14 83 xor r15,r15 84 85 movq xmm0,QWORD[(((-96)))+r12] 86 movq xmm1,QWORD[((-32))+r12] 87 pand xmm0,xmm4 88 movq xmm2,QWORD[32+r12] 89 pand xmm1,xmm5 90 91 mov rbp,r8 92 mul rbx 93 mov r10,rax 94 mov rax,QWORD[rcx] 95 96 movq xmm3,QWORD[96+r12] 97 pand xmm2,xmm6 98 por xmm0,xmm1 99 pand xmm3,xmm7 100 101 imul rbp,r10 102 mov r11,rdx 103 104 por xmm0,xmm2 105 lea r12,[256+r12] 106 por xmm0,xmm3 107 108 mul rbp 109 add r10,rax 110 mov rax,QWORD[8+rsi] 111 adc rdx,0 112 mov r13,rdx 113 114 lea r15,[1+r15] 115 jmp NEAR $L$1st_enter 116 117ALIGN 16 118$L$1st: 119 add r13,rax 120 mov rax,QWORD[r15*8+rsi] 121 adc rdx,0 122 add r13,r11 123 mov r11,r10 124 adc rdx,0 125 mov QWORD[((-16))+r15*8+rsp],r13 126 mov r13,rdx 127 128$L$1st_enter: 129 mul rbx 130 add r11,rax 131 mov rax,QWORD[r15*8+rcx] 132 adc rdx,0 133 lea r15,[1+r15] 134 mov r10,rdx 135 136 mul rbp 137 cmp r15,r9 138 jne NEAR $L$1st 139 140DB 102,72,15,126,195 141 142 add r13,rax 143 mov rax,QWORD[rsi] 144 adc rdx,0 145 add r13,r11 146 adc rdx,0 147 mov QWORD[((-16))+r15*8+rsp],r13 148 mov r13,rdx 149 mov r11,r10 150 151 xor rdx,rdx 152 add r13,r11 153 adc rdx,0 154 mov QWORD[((-8))+r9*8+rsp],r13 155 mov QWORD[r9*8+rsp],rdx 156 157 lea r14,[1+r14] 158 jmp NEAR $L$outer 159ALIGN 16 160$L$outer: 161 xor r15,r15 162 mov rbp,r8 163 mov r10,QWORD[rsp] 164 165 movq xmm0,QWORD[(((-96)))+r12] 166 movq xmm1,QWORD[((-32))+r12] 167 pand xmm0,xmm4 168 movq xmm2,QWORD[32+r12] 169 pand xmm1,xmm5 170 171 mul rbx 172 add r10,rax 173 mov rax,QWORD[rcx] 174 adc rdx,0 175 176 movq xmm3,QWORD[96+r12] 177 pand xmm2,xmm6 178 por xmm0,xmm1 179 pand xmm3,xmm7 180 181 imul rbp,r10 182 mov r11,rdx 183 184 por xmm0,xmm2 185 lea r12,[256+r12] 186 por xmm0,xmm3 187 188 mul rbp 189 add r10,rax 190 mov rax,QWORD[8+rsi] 191 adc rdx,0 192 mov r10,QWORD[8+rsp] 193 mov r13,rdx 194 195 lea r15,[1+r15] 196 jmp NEAR $L$inner_enter 197 198ALIGN 16 199$L$inner: 200 add r13,rax 201 mov rax,QWORD[r15*8+rsi] 202 adc rdx,0 203 add r13,r10 204 mov r10,QWORD[r15*8+rsp] 205 adc rdx,0 206 mov QWORD[((-16))+r15*8+rsp],r13 207 mov r13,rdx 208 209$L$inner_enter: 210 mul rbx 211 add r11,rax 212 mov rax,QWORD[r15*8+rcx] 213 adc rdx,0 214 add r10,r11 215 mov r11,rdx 216 adc r11,0 217 lea r15,[1+r15] 218 219 mul rbp 220 cmp r15,r9 221 jne NEAR $L$inner 222 223DB 102,72,15,126,195 224 225 add r13,rax 226 mov rax,QWORD[rsi] 227 adc rdx,0 228 add r13,r10 229 mov r10,QWORD[r15*8+rsp] 230 adc rdx,0 231 mov QWORD[((-16))+r15*8+rsp],r13 232 mov r13,rdx 233 234 xor rdx,rdx 235 add r13,r11 236 adc rdx,0 237 add r13,r10 238 adc rdx,0 239 mov QWORD[((-8))+r9*8+rsp],r13 240 mov QWORD[r9*8+rsp],rdx 241 242 lea r14,[1+r14] 243 cmp r14,r9 244 jb NEAR $L$outer 245 246 xor r14,r14 247 mov rax,QWORD[rsp] 248 lea rsi,[rsp] 249 mov r15,r9 250 jmp NEAR $L$sub 251ALIGN 16 252$L$sub: sbb rax,QWORD[r14*8+rcx] 253 mov QWORD[r14*8+rdi],rax 254 mov rax,QWORD[8+r14*8+rsi] 255 lea r14,[1+r14] 256 dec r15 257 jnz NEAR $L$sub 258 259 sbb rax,0 260 xor r14,r14 261 mov r15,r9 262ALIGN 16 263$L$copy: 264 mov rsi,QWORD[r14*8+rsp] 265 mov rcx,QWORD[r14*8+rdi] 266 xor rsi,rcx 267 and rsi,rax 268 xor rsi,rcx 269 mov QWORD[r14*8+rsp],r14 270 mov QWORD[r14*8+rdi],rsi 271 lea r14,[1+r14] 272 sub r15,1 273 jnz NEAR $L$copy 274 275 mov rsi,QWORD[8+r9*8+rsp] 276 mov rax,1 277 movaps xmm6,XMMWORD[((-88))+rsi] 278 movaps xmm7,XMMWORD[((-72))+rsi] 279 mov r15,QWORD[((-48))+rsi] 280 mov r14,QWORD[((-40))+rsi] 281 mov r13,QWORD[((-32))+rsi] 282 mov r12,QWORD[((-24))+rsi] 283 mov rbp,QWORD[((-16))+rsi] 284 mov rbx,QWORD[((-8))+rsi] 285 lea rsp,[rsi] 286$L$mul_epilogue: 287 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 288 mov rsi,QWORD[16+rsp] 289 DB 0F3h,0C3h ;repret 290$L$SEH_end_bn_mul_mont_gather5: 291 292ALIGN 32 293bn_mul4x_mont_gather5: 294 mov QWORD[8+rsp],rdi ;WIN64 prologue 295 mov QWORD[16+rsp],rsi 296 mov rax,rsp 297$L$SEH_begin_bn_mul4x_mont_gather5: 298 mov rdi,rcx 299 mov rsi,rdx 300 mov rdx,r8 301 mov rcx,r9 302 mov r8,QWORD[40+rsp] 303 mov r9,QWORD[48+rsp] 304 305 306$L$mul4x_enter: 307DB 0x67 308 mov rax,rsp 309 push rbx 310 push rbp 311 push r12 312 push r13 313 push r14 314 push r15 315 lea rsp,[((-40))+rsp] 316 movaps XMMWORD[rsp],xmm6 317 movaps XMMWORD[16+rsp],xmm7 318DB 0x67 319 mov r10d,r9d 320 shl r9d,3 321 shl r10d,3+2 322 neg r9 323 324 325 326 327 328 329 330 331 lea r11,[((-64))+r9*2+rsp] 332 sub r11,rsi 333 and r11,4095 334 cmp r10,r11 335 jb NEAR $L$mul4xsp_alt 336 sub rsp,r11 337 lea rsp,[((-64))+r9*2+rsp] 338 jmp NEAR $L$mul4xsp_done 339 340ALIGN 32 341$L$mul4xsp_alt: 342 lea r10,[((4096-64))+r9*2] 343 lea rsp,[((-64))+r9*2+rsp] 344 sub r11,r10 345 mov r10,0 346 cmovc r11,r10 347 sub rsp,r11 348$L$mul4xsp_done: 349 and rsp,-64 350 neg r9 351 352 mov QWORD[40+rsp],rax 353$L$mul4x_body: 354 355 call mul4x_internal 356 357 mov rsi,QWORD[40+rsp] 358 mov rax,1 359 movaps xmm6,XMMWORD[((-88))+rsi] 360 movaps xmm7,XMMWORD[((-72))+rsi] 361 mov r15,QWORD[((-48))+rsi] 362 mov r14,QWORD[((-40))+rsi] 363 mov r13,QWORD[((-32))+rsi] 364 mov r12,QWORD[((-24))+rsi] 365 mov rbp,QWORD[((-16))+rsi] 366 mov rbx,QWORD[((-8))+rsi] 367 lea rsp,[rsi] 368$L$mul4x_epilogue: 369 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 370 mov rsi,QWORD[16+rsp] 371 DB 0F3h,0C3h ;repret 372$L$SEH_end_bn_mul4x_mont_gather5: 373 374 375ALIGN 32 376mul4x_internal: 377 shl r9,5 378 mov r10d,DWORD[56+rax] 379 lea r13,[256+r9*1+rdx] 380 shr r9,5 381 mov r11,r10 382 shr r10,3 383 and r11,7 384 not r10 385 lea rax,[$L$magic_masks] 386 and r10,3 387 lea r12,[96+r11*8+rdx] 388 movq xmm4,QWORD[r10*8+rax] 389 movq xmm5,QWORD[8+r10*8+rax] 390 add r11,7 391 movq xmm6,QWORD[16+r10*8+rax] 392 movq xmm7,QWORD[24+r10*8+rax] 393 and r11,7 394 395 movq xmm0,QWORD[(((-96)))+r12] 396 lea r14,[256+r12] 397 movq xmm1,QWORD[((-32))+r12] 398 pand xmm0,xmm4 399 movq xmm2,QWORD[32+r12] 400 pand xmm1,xmm5 401 movq xmm3,QWORD[96+r12] 402 pand xmm2,xmm6 403DB 0x67 404 por xmm0,xmm1 405 movq xmm1,QWORD[((-96))+r14] 406DB 0x67 407 pand xmm3,xmm7 408DB 0x67 409 por xmm0,xmm2 410 movq xmm2,QWORD[((-32))+r14] 411DB 0x67 412 pand xmm1,xmm4 413DB 0x67 414 por xmm0,xmm3 415 movq xmm3,QWORD[32+r14] 416 417DB 102,72,15,126,195 418 movq xmm0,QWORD[96+r14] 419 mov QWORD[((16+8))+rsp],r13 420 mov QWORD[((56+8))+rsp],rdi 421 422 mov r8,QWORD[r8] 423 mov rax,QWORD[rsi] 424 lea rsi,[r9*1+rsi] 425 neg r9 426 427 mov rbp,r8 428 mul rbx 429 mov r10,rax 430 mov rax,QWORD[rcx] 431 432 pand xmm2,xmm5 433 pand xmm3,xmm6 434 por xmm1,xmm2 435 436 imul rbp,r10 437 438 439 440 441 442 443 444 lea r14,[((64+8))+r11*8+rsp] 445 mov r11,rdx 446 447 pand xmm0,xmm7 448 por xmm1,xmm3 449 lea r12,[512+r12] 450 por xmm0,xmm1 451 452 mul rbp 453 add r10,rax 454 mov rax,QWORD[8+r9*1+rsi] 455 adc rdx,0 456 mov rdi,rdx 457 458 mul rbx 459 add r11,rax 460 mov rax,QWORD[16+rcx] 461 adc rdx,0 462 mov r10,rdx 463 464 mul rbp 465 add rdi,rax 466 mov rax,QWORD[16+r9*1+rsi] 467 adc rdx,0 468 add rdi,r11 469 lea r15,[32+r9] 470 lea rcx,[64+rcx] 471 adc rdx,0 472 mov QWORD[r14],rdi 473 mov r13,rdx 474 jmp NEAR $L$1st4x 475 476ALIGN 32 477$L$1st4x: 478 mul rbx 479 add r10,rax 480 mov rax,QWORD[((-32))+rcx] 481 lea r14,[32+r14] 482 adc rdx,0 483 mov r11,rdx 484 485 mul rbp 486 add r13,rax 487 mov rax,QWORD[((-8))+r15*1+rsi] 488 adc rdx,0 489 add r13,r10 490 adc rdx,0 491 mov QWORD[((-24))+r14],r13 492 mov rdi,rdx 493 494 mul rbx 495 add r11,rax 496 mov rax,QWORD[((-16))+rcx] 497 adc rdx,0 498 mov r10,rdx 499 500 mul rbp 501 add rdi,rax 502 mov rax,QWORD[r15*1+rsi] 503 adc rdx,0 504 add rdi,r11 505 adc rdx,0 506 mov QWORD[((-16))+r14],rdi 507 mov r13,rdx 508 509 mul rbx 510 add r10,rax 511 mov rax,QWORD[rcx] 512 adc rdx,0 513 mov r11,rdx 514 515 mul rbp 516 add r13,rax 517 mov rax,QWORD[8+r15*1+rsi] 518 adc rdx,0 519 add r13,r10 520 adc rdx,0 521 mov QWORD[((-8))+r14],r13 522 mov rdi,rdx 523 524 mul rbx 525 add r11,rax 526 mov rax,QWORD[16+rcx] 527 adc rdx,0 528 mov r10,rdx 529 530 mul rbp 531 add rdi,rax 532 mov rax,QWORD[16+r15*1+rsi] 533 adc rdx,0 534 add rdi,r11 535 lea rcx,[64+rcx] 536 adc rdx,0 537 mov QWORD[r14],rdi 538 mov r13,rdx 539 540 add r15,32 541 jnz NEAR $L$1st4x 542 543 mul rbx 544 add r10,rax 545 mov rax,QWORD[((-32))+rcx] 546 lea r14,[32+r14] 547 adc rdx,0 548 mov r11,rdx 549 550 mul rbp 551 add r13,rax 552 mov rax,QWORD[((-8))+rsi] 553 adc rdx,0 554 add r13,r10 555 adc rdx,0 556 mov QWORD[((-24))+r14],r13 557 mov rdi,rdx 558 559 mul rbx 560 add r11,rax 561 mov rax,QWORD[((-16))+rcx] 562 adc rdx,0 563 mov r10,rdx 564 565 mul rbp 566 add rdi,rax 567 mov rax,QWORD[r9*1+rsi] 568 adc rdx,0 569 add rdi,r11 570 adc rdx,0 571 mov QWORD[((-16))+r14],rdi 572 mov r13,rdx 573 574DB 102,72,15,126,195 575 lea rcx,[r9*2+rcx] 576 577 xor rdi,rdi 578 add r13,r10 579 adc rdi,0 580 mov QWORD[((-8))+r14],r13 581 582 jmp NEAR $L$outer4x 583 584ALIGN 32 585$L$outer4x: 586 mov r10,QWORD[r9*1+r14] 587 mov rbp,r8 588 mul rbx 589 add r10,rax 590 mov rax,QWORD[rcx] 591 adc rdx,0 592 593 movq xmm0,QWORD[(((-96)))+r12] 594 movq xmm1,QWORD[((-32))+r12] 595 pand xmm0,xmm4 596 movq xmm2,QWORD[32+r12] 597 pand xmm1,xmm5 598 movq xmm3,QWORD[96+r12] 599 600 imul rbp,r10 601DB 0x67 602 mov r11,rdx 603 mov QWORD[r14],rdi 604 605 pand xmm2,xmm6 606 por xmm0,xmm1 607 pand xmm3,xmm7 608 por xmm0,xmm2 609 lea r14,[r9*1+r14] 610 lea r12,[256+r12] 611 por xmm0,xmm3 612 613 mul rbp 614 add r10,rax 615 mov rax,QWORD[8+r9*1+rsi] 616 adc rdx,0 617 mov rdi,rdx 618 619 mul rbx 620 add r11,rax 621 mov rax,QWORD[16+rcx] 622 adc rdx,0 623 add r11,QWORD[8+r14] 624 adc rdx,0 625 mov r10,rdx 626 627 mul rbp 628 add rdi,rax 629 mov rax,QWORD[16+r9*1+rsi] 630 adc rdx,0 631 add rdi,r11 632 lea r15,[32+r9] 633 lea rcx,[64+rcx] 634 adc rdx,0 635 mov r13,rdx 636 jmp NEAR $L$inner4x 637 638ALIGN 32 639$L$inner4x: 640 mul rbx 641 add r10,rax 642 mov rax,QWORD[((-32))+rcx] 643 adc rdx,0 644 add r10,QWORD[16+r14] 645 lea r14,[32+r14] 646 adc rdx,0 647 mov r11,rdx 648 649 mul rbp 650 add r13,rax 651 mov rax,QWORD[((-8))+r15*1+rsi] 652 adc rdx,0 653 add r13,r10 654 adc rdx,0 655 mov QWORD[((-32))+r14],rdi 656 mov rdi,rdx 657 658 mul rbx 659 add r11,rax 660 mov rax,QWORD[((-16))+rcx] 661 adc rdx,0 662 add r11,QWORD[((-8))+r14] 663 adc rdx,0 664 mov r10,rdx 665 666 mul rbp 667 add rdi,rax 668 mov rax,QWORD[r15*1+rsi] 669 adc rdx,0 670 add rdi,r11 671 adc rdx,0 672 mov QWORD[((-24))+r14],r13 673 mov r13,rdx 674 675 mul rbx 676 add r10,rax 677 mov rax,QWORD[rcx] 678 adc rdx,0 679 add r10,QWORD[r14] 680 adc rdx,0 681 mov r11,rdx 682 683 mul rbp 684 add r13,rax 685 mov rax,QWORD[8+r15*1+rsi] 686 adc rdx,0 687 add r13,r10 688 adc rdx,0 689 mov QWORD[((-16))+r14],rdi 690 mov rdi,rdx 691 692 mul rbx 693 add r11,rax 694 mov rax,QWORD[16+rcx] 695 adc rdx,0 696 add r11,QWORD[8+r14] 697 adc rdx,0 698 mov r10,rdx 699 700 mul rbp 701 add rdi,rax 702 mov rax,QWORD[16+r15*1+rsi] 703 adc rdx,0 704 add rdi,r11 705 lea rcx,[64+rcx] 706 adc rdx,0 707 mov QWORD[((-8))+r14],r13 708 mov r13,rdx 709 710 add r15,32 711 jnz NEAR $L$inner4x 712 713 mul rbx 714 add r10,rax 715 mov rax,QWORD[((-32))+rcx] 716 adc rdx,0 717 add r10,QWORD[16+r14] 718 lea r14,[32+r14] 719 adc rdx,0 720 mov r11,rdx 721 722 mul rbp 723 add r13,rax 724 mov rax,QWORD[((-8))+rsi] 725 adc rdx,0 726 add r13,r10 727 adc rdx,0 728 mov QWORD[((-32))+r14],rdi 729 mov rdi,rdx 730 731 mul rbx 732 add r11,rax 733 mov rax,rbp 734 mov rbp,QWORD[((-16))+rcx] 735 adc rdx,0 736 add r11,QWORD[((-8))+r14] 737 adc rdx,0 738 mov r10,rdx 739 740 mul rbp 741 add rdi,rax 742 mov rax,QWORD[r9*1+rsi] 743 adc rdx,0 744 add rdi,r11 745 adc rdx,0 746 mov QWORD[((-24))+r14],r13 747 mov r13,rdx 748 749DB 102,72,15,126,195 750 mov QWORD[((-16))+r14],rdi 751 lea rcx,[r9*2+rcx] 752 753 xor rdi,rdi 754 add r13,r10 755 adc rdi,0 756 add r13,QWORD[r14] 757 adc rdi,0 758 mov QWORD[((-8))+r14],r13 759 760 cmp r12,QWORD[((16+8))+rsp] 761 jb NEAR $L$outer4x 762 sub rbp,r13 763 adc r15,r15 764 or rdi,r15 765 xor rdi,1 766 lea rbx,[r9*1+r14] 767 lea rbp,[rdi*8+rcx] 768 mov rcx,r9 769 sar rcx,3+2 770 mov rdi,QWORD[((56+8))+rsp] 771 jmp NEAR $L$sqr4x_sub 772 773global bn_power5 774 775ALIGN 32 776bn_power5: 777 mov QWORD[8+rsp],rdi ;WIN64 prologue 778 mov QWORD[16+rsp],rsi 779 mov rax,rsp 780$L$SEH_begin_bn_power5: 781 mov rdi,rcx 782 mov rsi,rdx 783 mov rdx,r8 784 mov rcx,r9 785 mov r8,QWORD[40+rsp] 786 mov r9,QWORD[48+rsp] 787 788 789 mov rax,rsp 790 push rbx 791 push rbp 792 push r12 793 push r13 794 push r14 795 push r15 796 lea rsp,[((-40))+rsp] 797 movaps XMMWORD[rsp],xmm6 798 movaps XMMWORD[16+rsp],xmm7 799 mov r10d,r9d 800 shl r9d,3 801 shl r10d,3+2 802 neg r9 803 mov r8,QWORD[r8] 804 805 806 807 808 809 810 811 lea r11,[((-64))+r9*2+rsp] 812 sub r11,rsi 813 and r11,4095 814 cmp r10,r11 815 jb NEAR $L$pwr_sp_alt 816 sub rsp,r11 817 lea rsp,[((-64))+r9*2+rsp] 818 jmp NEAR $L$pwr_sp_done 819 820ALIGN 32 821$L$pwr_sp_alt: 822 lea r10,[((4096-64))+r9*2] 823 lea rsp,[((-64))+r9*2+rsp] 824 sub r11,r10 825 mov r10,0 826 cmovc r11,r10 827 sub rsp,r11 828$L$pwr_sp_done: 829 and rsp,-64 830 mov r10,r9 831 neg r9 832 833 834 835 836 837 838 839 840 841 842 mov QWORD[32+rsp],r8 843 mov QWORD[40+rsp],rax 844$L$power5_body: 845DB 102,72,15,110,207 846DB 102,72,15,110,209 847DB 102,73,15,110,218 848DB 102,72,15,110,226 849 850 call __bn_sqr8x_internal 851 call __bn_sqr8x_internal 852 call __bn_sqr8x_internal 853 call __bn_sqr8x_internal 854 call __bn_sqr8x_internal 855 856DB 102,72,15,126,209 857DB 102,72,15,126,226 858 mov rdi,rsi 859 mov rax,QWORD[40+rsp] 860 lea r8,[32+rsp] 861 862 call mul4x_internal 863 864 mov rsi,QWORD[40+rsp] 865 mov rax,1 866 mov r15,QWORD[((-48))+rsi] 867 mov r14,QWORD[((-40))+rsi] 868 mov r13,QWORD[((-32))+rsi] 869 mov r12,QWORD[((-24))+rsi] 870 mov rbp,QWORD[((-16))+rsi] 871 mov rbx,QWORD[((-8))+rsi] 872 lea rsp,[rsi] 873$L$power5_epilogue: 874 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 875 mov rsi,QWORD[16+rsp] 876 DB 0F3h,0C3h ;repret 877$L$SEH_end_bn_power5: 878 879global bn_sqr8x_internal 880 881 882ALIGN 32 883bn_sqr8x_internal: 884__bn_sqr8x_internal: 885 886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 lea rbp,[32+r10] 959 lea rsi,[r9*1+rsi] 960 961 mov rcx,r9 962 963 964 mov r14,QWORD[((-32))+rbp*1+rsi] 965 lea rdi,[((48+8))+r9*2+rsp] 966 mov rax,QWORD[((-24))+rbp*1+rsi] 967 lea rdi,[((-32))+rbp*1+rdi] 968 mov rbx,QWORD[((-16))+rbp*1+rsi] 969 mov r15,rax 970 971 mul r14 972 mov r10,rax 973 mov rax,rbx 974 mov r11,rdx 975 mov QWORD[((-24))+rbp*1+rdi],r10 976 977 mul r14 978 add r11,rax 979 mov rax,rbx 980 adc rdx,0 981 mov QWORD[((-16))+rbp*1+rdi],r11 982 mov r10,rdx 983 984 985 mov rbx,QWORD[((-8))+rbp*1+rsi] 986 mul r15 987 mov r12,rax 988 mov rax,rbx 989 mov r13,rdx 990 991 lea rcx,[rbp] 992 mul r14 993 add r10,rax 994 mov rax,rbx 995 mov r11,rdx 996 adc r11,0 997 add r10,r12 998 adc r11,0 999 mov QWORD[((-8))+rcx*1+rdi],r10 1000 jmp NEAR $L$sqr4x_1st 1001 1002ALIGN 32 1003$L$sqr4x_1st: 1004 mov rbx,QWORD[rcx*1+rsi] 1005 mul r15 1006 add r13,rax 1007 mov rax,rbx 1008 mov r12,rdx 1009 adc r12,0 1010 1011 mul r14 1012 add r11,rax 1013 mov rax,rbx 1014 mov rbx,QWORD[8+rcx*1+rsi] 1015 mov r10,rdx 1016 adc r10,0 1017 add r11,r13 1018 adc r10,0 1019 1020 1021 mul r15 1022 add r12,rax 1023 mov rax,rbx 1024 mov QWORD[rcx*1+rdi],r11 1025 mov r13,rdx 1026 adc r13,0 1027 1028 mul r14 1029 add r10,rax 1030 mov rax,rbx 1031 mov rbx,QWORD[16+rcx*1+rsi] 1032 mov r11,rdx 1033 adc r11,0 1034 add r10,r12 1035 adc r11,0 1036 1037 mul r15 1038 add r13,rax 1039 mov rax,rbx 1040 mov QWORD[8+rcx*1+rdi],r10 1041 mov r12,rdx 1042 adc r12,0 1043 1044 mul r14 1045 add r11,rax 1046 mov rax,rbx 1047 mov rbx,QWORD[24+rcx*1+rsi] 1048 mov r10,rdx 1049 adc r10,0 1050 add r11,r13 1051 adc r10,0 1052 1053 1054 mul r15 1055 add r12,rax 1056 mov rax,rbx 1057 mov QWORD[16+rcx*1+rdi],r11 1058 mov r13,rdx 1059 adc r13,0 1060 lea rcx,[32+rcx] 1061 1062 mul r14 1063 add r10,rax 1064 mov rax,rbx 1065 mov r11,rdx 1066 adc r11,0 1067 add r10,r12 1068 adc r11,0 1069 mov QWORD[((-8))+rcx*1+rdi],r10 1070 1071 cmp rcx,0 1072 jne NEAR $L$sqr4x_1st 1073 1074 mul r15 1075 add r13,rax 1076 lea rbp,[16+rbp] 1077 adc rdx,0 1078 add r13,r11 1079 adc rdx,0 1080 1081 mov QWORD[rdi],r13 1082 mov r12,rdx 1083 mov QWORD[8+rdi],rdx 1084 jmp NEAR $L$sqr4x_outer 1085 1086ALIGN 32 1087$L$sqr4x_outer: 1088 mov r14,QWORD[((-32))+rbp*1+rsi] 1089 lea rdi,[((48+8))+r9*2+rsp] 1090 mov rax,QWORD[((-24))+rbp*1+rsi] 1091 lea rdi,[((-32))+rbp*1+rdi] 1092 mov rbx,QWORD[((-16))+rbp*1+rsi] 1093 mov r15,rax 1094 1095 mul r14 1096 mov r10,QWORD[((-24))+rbp*1+rdi] 1097 add r10,rax 1098 mov rax,rbx 1099 adc rdx,0 1100 mov QWORD[((-24))+rbp*1+rdi],r10 1101 mov r11,rdx 1102 1103 mul r14 1104 add r11,rax 1105 mov rax,rbx 1106 adc rdx,0 1107 add r11,QWORD[((-16))+rbp*1+rdi] 1108 mov r10,rdx 1109 adc r10,0 1110 mov QWORD[((-16))+rbp*1+rdi],r11 1111 1112 xor r12,r12 1113 1114 mov rbx,QWORD[((-8))+rbp*1+rsi] 1115 mul r15 1116 add r12,rax 1117 mov rax,rbx 1118 adc rdx,0 1119 add r12,QWORD[((-8))+rbp*1+rdi] 1120 mov r13,rdx 1121 adc r13,0 1122 1123 mul r14 1124 add r10,rax 1125 mov rax,rbx 1126 adc rdx,0 1127 add r10,r12 1128 mov r11,rdx 1129 adc r11,0 1130 mov QWORD[((-8))+rbp*1+rdi],r10 1131 1132 lea rcx,[rbp] 1133 jmp NEAR $L$sqr4x_inner 1134 1135ALIGN 32 1136$L$sqr4x_inner: 1137 mov rbx,QWORD[rcx*1+rsi] 1138 mul r15 1139 add r13,rax 1140 mov rax,rbx 1141 mov r12,rdx 1142 adc r12,0 1143 add r13,QWORD[rcx*1+rdi] 1144 adc r12,0 1145 1146DB 0x67 1147 mul r14 1148 add r11,rax 1149 mov rax,rbx 1150 mov rbx,QWORD[8+rcx*1+rsi] 1151 mov r10,rdx 1152 adc r10,0 1153 add r11,r13 1154 adc r10,0 1155 1156 mul r15 1157 add r12,rax 1158 mov QWORD[rcx*1+rdi],r11 1159 mov rax,rbx 1160 mov r13,rdx 1161 adc r13,0 1162 add r12,QWORD[8+rcx*1+rdi] 1163 lea rcx,[16+rcx] 1164 adc r13,0 1165 1166 mul r14 1167 add r10,rax 1168 mov rax,rbx 1169 adc rdx,0 1170 add r10,r12 1171 mov r11,rdx 1172 adc r11,0 1173 mov QWORD[((-8))+rcx*1+rdi],r10 1174 1175 cmp rcx,0 1176 jne NEAR $L$sqr4x_inner 1177 1178DB 0x67 1179 mul r15 1180 add r13,rax 1181 adc rdx,0 1182 add r13,r11 1183 adc rdx,0 1184 1185 mov QWORD[rdi],r13 1186 mov r12,rdx 1187 mov QWORD[8+rdi],rdx 1188 1189 add rbp,16 1190 jnz NEAR $L$sqr4x_outer 1191 1192 1193 mov r14,QWORD[((-32))+rsi] 1194 lea rdi,[((48+8))+r9*2+rsp] 1195 mov rax,QWORD[((-24))+rsi] 1196 lea rdi,[((-32))+rbp*1+rdi] 1197 mov rbx,QWORD[((-16))+rsi] 1198 mov r15,rax 1199 1200 mul r14 1201 add r10,rax 1202 mov rax,rbx 1203 mov r11,rdx 1204 adc r11,0 1205 1206 mul r14 1207 add r11,rax 1208 mov rax,rbx 1209 mov QWORD[((-24))+rdi],r10 1210 mov r10,rdx 1211 adc r10,0 1212 add r11,r13 1213 mov rbx,QWORD[((-8))+rsi] 1214 adc r10,0 1215 1216 mul r15 1217 add r12,rax 1218 mov rax,rbx 1219 mov QWORD[((-16))+rdi],r11 1220 mov r13,rdx 1221 adc r13,0 1222 1223 mul r14 1224 add r10,rax 1225 mov rax,rbx 1226 mov r11,rdx 1227 adc r11,0 1228 add r10,r12 1229 adc r11,0 1230 mov QWORD[((-8))+rdi],r10 1231 1232 mul r15 1233 add r13,rax 1234 mov rax,QWORD[((-16))+rsi] 1235 adc rdx,0 1236 add r13,r11 1237 adc rdx,0 1238 1239 mov QWORD[rdi],r13 1240 mov r12,rdx 1241 mov QWORD[8+rdi],rdx 1242 1243 mul rbx 1244 add rbp,16 1245 xor r14,r14 1246 sub rbp,r9 1247 xor r15,r15 1248 1249 add rax,r12 1250 adc rdx,0 1251 mov QWORD[8+rdi],rax 1252 mov QWORD[16+rdi],rdx 1253 mov QWORD[24+rdi],r15 1254 1255 mov rax,QWORD[((-16))+rbp*1+rsi] 1256 lea rdi,[((48+8))+rsp] 1257 xor r10,r10 1258 mov r11,QWORD[8+rdi] 1259 1260 lea r12,[r10*2+r14] 1261 shr r10,63 1262 lea r13,[r11*2+rcx] 1263 shr r11,63 1264 or r13,r10 1265 mov r10,QWORD[16+rdi] 1266 mov r14,r11 1267 mul rax 1268 neg r15 1269 mov r11,QWORD[24+rdi] 1270 adc r12,rax 1271 mov rax,QWORD[((-8))+rbp*1+rsi] 1272 mov QWORD[rdi],r12 1273 adc r13,rdx 1274 1275 lea rbx,[r10*2+r14] 1276 mov QWORD[8+rdi],r13 1277 sbb r15,r15 1278 shr r10,63 1279 lea r8,[r11*2+rcx] 1280 shr r11,63 1281 or r8,r10 1282 mov r10,QWORD[32+rdi] 1283 mov r14,r11 1284 mul rax 1285 neg r15 1286 mov r11,QWORD[40+rdi] 1287 adc rbx,rax 1288 mov rax,QWORD[rbp*1+rsi] 1289 mov QWORD[16+rdi],rbx 1290 adc r8,rdx 1291 lea rbp,[16+rbp] 1292 mov QWORD[24+rdi],r8 1293 sbb r15,r15 1294 lea rdi,[64+rdi] 1295 jmp NEAR $L$sqr4x_shift_n_add 1296 1297ALIGN 32 1298$L$sqr4x_shift_n_add: 1299 lea r12,[r10*2+r14] 1300 shr r10,63 1301 lea r13,[r11*2+rcx] 1302 shr r11,63 1303 or r13,r10 1304 mov r10,QWORD[((-16))+rdi] 1305 mov r14,r11 1306 mul rax 1307 neg r15 1308 mov r11,QWORD[((-8))+rdi] 1309 adc r12,rax 1310 mov rax,QWORD[((-8))+rbp*1+rsi] 1311 mov QWORD[((-32))+rdi],r12 1312 adc r13,rdx 1313 1314 lea rbx,[r10*2+r14] 1315 mov QWORD[((-24))+rdi],r13 1316 sbb r15,r15 1317 shr r10,63 1318 lea r8,[r11*2+rcx] 1319 shr r11,63 1320 or r8,r10 1321 mov r10,QWORD[rdi] 1322 mov r14,r11 1323 mul rax 1324 neg r15 1325 mov r11,QWORD[8+rdi] 1326 adc rbx,rax 1327 mov rax,QWORD[rbp*1+rsi] 1328 mov QWORD[((-16))+rdi],rbx 1329 adc r8,rdx 1330 1331 lea r12,[r10*2+r14] 1332 mov QWORD[((-8))+rdi],r8 1333 sbb r15,r15 1334 shr r10,63 1335 lea r13,[r11*2+rcx] 1336 shr r11,63 1337 or r13,r10 1338 mov r10,QWORD[16+rdi] 1339 mov r14,r11 1340 mul rax 1341 neg r15 1342 mov r11,QWORD[24+rdi] 1343 adc r12,rax 1344 mov rax,QWORD[8+rbp*1+rsi] 1345 mov QWORD[rdi],r12 1346 adc r13,rdx 1347 1348 lea rbx,[r10*2+r14] 1349 mov QWORD[8+rdi],r13 1350 sbb r15,r15 1351 shr r10,63 1352 lea r8,[r11*2+rcx] 1353 shr r11,63 1354 or r8,r10 1355 mov r10,QWORD[32+rdi] 1356 mov r14,r11 1357 mul rax 1358 neg r15 1359 mov r11,QWORD[40+rdi] 1360 adc rbx,rax 1361 mov rax,QWORD[16+rbp*1+rsi] 1362 mov QWORD[16+rdi],rbx 1363 adc r8,rdx 1364 mov QWORD[24+rdi],r8 1365 sbb r15,r15 1366 lea rdi,[64+rdi] 1367 add rbp,32 1368 jnz NEAR $L$sqr4x_shift_n_add 1369 1370 lea r12,[r10*2+r14] 1371DB 0x67 1372 shr r10,63 1373 lea r13,[r11*2+rcx] 1374 shr r11,63 1375 or r13,r10 1376 mov r10,QWORD[((-16))+rdi] 1377 mov r14,r11 1378 mul rax 1379 neg r15 1380 mov r11,QWORD[((-8))+rdi] 1381 adc r12,rax 1382 mov rax,QWORD[((-8))+rsi] 1383 mov QWORD[((-32))+rdi],r12 1384 adc r13,rdx 1385 1386 lea rbx,[r10*2+r14] 1387 mov QWORD[((-24))+rdi],r13 1388 sbb r15,r15 1389 shr r10,63 1390 lea r8,[r11*2+rcx] 1391 shr r11,63 1392 or r8,r10 1393 mul rax 1394 neg r15 1395 adc rbx,rax 1396 adc r8,rdx 1397 mov QWORD[((-16))+rdi],rbx 1398 mov QWORD[((-8))+rdi],r8 1399DB 102,72,15,126,213 1400sqr8x_reduction: 1401 xor rax,rax 1402 lea rcx,[r9*2+rbp] 1403 lea rdx,[((48+8))+r9*2+rsp] 1404 mov QWORD[((0+8))+rsp],rcx 1405 lea rdi,[((48+8))+r9*1+rsp] 1406 mov QWORD[((8+8))+rsp],rdx 1407 neg r9 1408 jmp NEAR $L$8x_reduction_loop 1409 1410ALIGN 32 1411$L$8x_reduction_loop: 1412 lea rdi,[r9*1+rdi] 1413DB 0x66 1414 mov rbx,QWORD[rdi] 1415 mov r9,QWORD[8+rdi] 1416 mov r10,QWORD[16+rdi] 1417 mov r11,QWORD[24+rdi] 1418 mov r12,QWORD[32+rdi] 1419 mov r13,QWORD[40+rdi] 1420 mov r14,QWORD[48+rdi] 1421 mov r15,QWORD[56+rdi] 1422 mov QWORD[rdx],rax 1423 lea rdi,[64+rdi] 1424 1425DB 0x67 1426 mov r8,rbx 1427 imul rbx,QWORD[((32+8))+rsp] 1428 mov rax,QWORD[rbp] 1429 mov ecx,8 1430 jmp NEAR $L$8x_reduce 1431 1432ALIGN 32 1433$L$8x_reduce: 1434 mul rbx 1435 mov rax,QWORD[16+rbp] 1436 neg r8 1437 mov r8,rdx 1438 adc r8,0 1439 1440 mul rbx 1441 add r9,rax 1442 mov rax,QWORD[32+rbp] 1443 adc rdx,0 1444 add r8,r9 1445 mov QWORD[((48-8+8))+rcx*8+rsp],rbx 1446 mov r9,rdx 1447 adc r9,0 1448 1449 mul rbx 1450 add r10,rax 1451 mov rax,QWORD[48+rbp] 1452 adc rdx,0 1453 add r9,r10 1454 mov rsi,QWORD[((32+8))+rsp] 1455 mov r10,rdx 1456 adc r10,0 1457 1458 mul rbx 1459 add r11,rax 1460 mov rax,QWORD[64+rbp] 1461 adc rdx,0 1462 imul rsi,r8 1463 add r10,r11 1464 mov r11,rdx 1465 adc r11,0 1466 1467 mul rbx 1468 add r12,rax 1469 mov rax,QWORD[80+rbp] 1470 adc rdx,0 1471 add r11,r12 1472 mov r12,rdx 1473 adc r12,0 1474 1475 mul rbx 1476 add r13,rax 1477 mov rax,QWORD[96+rbp] 1478 adc rdx,0 1479 add r12,r13 1480 mov r13,rdx 1481 adc r13,0 1482 1483 mul rbx 1484 add r14,rax 1485 mov rax,QWORD[112+rbp] 1486 adc rdx,0 1487 add r13,r14 1488 mov r14,rdx 1489 adc r14,0 1490 1491 mul rbx 1492 mov rbx,rsi 1493 add r15,rax 1494 mov rax,QWORD[rbp] 1495 adc rdx,0 1496 add r14,r15 1497 mov r15,rdx 1498 adc r15,0 1499 1500 dec ecx 1501 jnz NEAR $L$8x_reduce 1502 1503 lea rbp,[128+rbp] 1504 xor rax,rax 1505 mov rdx,QWORD[((8+8))+rsp] 1506 cmp rbp,QWORD[((0+8))+rsp] 1507 jae NEAR $L$8x_no_tail 1508 1509DB 0x66 1510 add r8,QWORD[rdi] 1511 adc r9,QWORD[8+rdi] 1512 adc r10,QWORD[16+rdi] 1513 adc r11,QWORD[24+rdi] 1514 adc r12,QWORD[32+rdi] 1515 adc r13,QWORD[40+rdi] 1516 adc r14,QWORD[48+rdi] 1517 adc r15,QWORD[56+rdi] 1518 sbb rsi,rsi 1519 1520 mov rbx,QWORD[((48+56+8))+rsp] 1521 mov ecx,8 1522 mov rax,QWORD[rbp] 1523 jmp NEAR $L$8x_tail 1524 1525ALIGN 32 1526$L$8x_tail: 1527 mul rbx 1528 add r8,rax 1529 mov rax,QWORD[16+rbp] 1530 mov QWORD[rdi],r8 1531 mov r8,rdx 1532 adc r8,0 1533 1534 mul rbx 1535 add r9,rax 1536 mov rax,QWORD[32+rbp] 1537 adc rdx,0 1538 add r8,r9 1539 lea rdi,[8+rdi] 1540 mov r9,rdx 1541 adc r9,0 1542 1543 mul rbx 1544 add r10,rax 1545 mov rax,QWORD[48+rbp] 1546 adc rdx,0 1547 add r9,r10 1548 mov r10,rdx 1549 adc r10,0 1550 1551 mul rbx 1552 add r11,rax 1553 mov rax,QWORD[64+rbp] 1554 adc rdx,0 1555 add r10,r11 1556 mov r11,rdx 1557 adc r11,0 1558 1559 mul rbx 1560 add r12,rax 1561 mov rax,QWORD[80+rbp] 1562 adc rdx,0 1563 add r11,r12 1564 mov r12,rdx 1565 adc r12,0 1566 1567 mul rbx 1568 add r13,rax 1569 mov rax,QWORD[96+rbp] 1570 adc rdx,0 1571 add r12,r13 1572 mov r13,rdx 1573 adc r13,0 1574 1575 mul rbx 1576 add r14,rax 1577 mov rax,QWORD[112+rbp] 1578 adc rdx,0 1579 add r13,r14 1580 mov r14,rdx 1581 adc r14,0 1582 1583 mul rbx 1584 mov rbx,QWORD[((48-16+8))+rcx*8+rsp] 1585 add r15,rax 1586 adc rdx,0 1587 add r14,r15 1588 mov rax,QWORD[rbp] 1589 mov r15,rdx 1590 adc r15,0 1591 1592 dec ecx 1593 jnz NEAR $L$8x_tail 1594 1595 lea rbp,[128+rbp] 1596 mov rdx,QWORD[((8+8))+rsp] 1597 cmp rbp,QWORD[((0+8))+rsp] 1598 jae NEAR $L$8x_tail_done 1599 1600 mov rbx,QWORD[((48+56+8))+rsp] 1601 neg rsi 1602 mov rax,QWORD[rbp] 1603 adc r8,QWORD[rdi] 1604 adc r9,QWORD[8+rdi] 1605 adc r10,QWORD[16+rdi] 1606 adc r11,QWORD[24+rdi] 1607 adc r12,QWORD[32+rdi] 1608 adc r13,QWORD[40+rdi] 1609 adc r14,QWORD[48+rdi] 1610 adc r15,QWORD[56+rdi] 1611 sbb rsi,rsi 1612 1613 mov ecx,8 1614 jmp NEAR $L$8x_tail 1615 1616ALIGN 32 1617$L$8x_tail_done: 1618 add r8,QWORD[rdx] 1619 adc r9,0 1620 adc r10,0 1621 adc r11,0 1622 adc r12,0 1623 adc r13,0 1624 adc r14,0 1625 adc r15,0 1626 1627 1628 xor rax,rax 1629 1630 neg rsi 1631$L$8x_no_tail: 1632 adc r8,QWORD[rdi] 1633 adc r9,QWORD[8+rdi] 1634 adc r10,QWORD[16+rdi] 1635 adc r11,QWORD[24+rdi] 1636 adc r12,QWORD[32+rdi] 1637 adc r13,QWORD[40+rdi] 1638 adc r14,QWORD[48+rdi] 1639 adc r15,QWORD[56+rdi] 1640 adc rax,0 1641 mov rcx,QWORD[((-16))+rbp] 1642 xor rsi,rsi 1643 1644DB 102,72,15,126,213 1645 1646 mov QWORD[rdi],r8 1647 mov QWORD[8+rdi],r9 1648DB 102,73,15,126,217 1649 mov QWORD[16+rdi],r10 1650 mov QWORD[24+rdi],r11 1651 mov QWORD[32+rdi],r12 1652 mov QWORD[40+rdi],r13 1653 mov QWORD[48+rdi],r14 1654 mov QWORD[56+rdi],r15 1655 lea rdi,[64+rdi] 1656 1657 cmp rdi,rdx 1658 jb NEAR $L$8x_reduction_loop 1659 1660 sub rcx,r15 1661 lea rbx,[r9*1+rdi] 1662 adc rsi,rsi 1663 mov rcx,r9 1664 or rax,rsi 1665DB 102,72,15,126,207 1666 xor rax,1 1667DB 102,72,15,126,206 1668 lea rbp,[rax*8+rbp] 1669 sar rcx,3+2 1670 jmp NEAR $L$sqr4x_sub 1671 1672ALIGN 32 1673$L$sqr4x_sub: 1674DB 0x66 1675 mov r12,QWORD[rbx] 1676 mov r13,QWORD[8+rbx] 1677 sbb r12,QWORD[rbp] 1678 mov r14,QWORD[16+rbx] 1679 sbb r13,QWORD[16+rbp] 1680 mov r15,QWORD[24+rbx] 1681 lea rbx,[32+rbx] 1682 sbb r14,QWORD[32+rbp] 1683 mov QWORD[rdi],r12 1684 sbb r15,QWORD[48+rbp] 1685 lea rbp,[64+rbp] 1686 mov QWORD[8+rdi],r13 1687 mov QWORD[16+rdi],r14 1688 mov QWORD[24+rdi],r15 1689 lea rdi,[32+rdi] 1690 1691 inc rcx 1692 jnz NEAR $L$sqr4x_sub 1693 mov r10,r9 1694 neg r9 1695 DB 0F3h,0C3h ;repret 1696 1697global bn_from_montgomery 1698 1699ALIGN 32 1700bn_from_montgomery: 1701 test DWORD[48+rsp],7 1702 jz NEAR bn_from_mont8x 1703 xor eax,eax 1704 DB 0F3h,0C3h ;repret 1705 1706 1707 1708ALIGN 32 1709bn_from_mont8x: 1710 mov QWORD[8+rsp],rdi ;WIN64 prologue 1711 mov QWORD[16+rsp],rsi 1712 mov rax,rsp 1713$L$SEH_begin_bn_from_mont8x: 1714 mov rdi,rcx 1715 mov rsi,rdx 1716 mov rdx,r8 1717 mov rcx,r9 1718 mov r8,QWORD[40+rsp] 1719 mov r9,QWORD[48+rsp] 1720 1721 1722DB 0x67 1723 mov rax,rsp 1724 push rbx 1725 push rbp 1726 push r12 1727 push r13 1728 push r14 1729 push r15 1730 lea rsp,[((-40))+rsp] 1731 movaps XMMWORD[rsp],xmm6 1732 movaps XMMWORD[16+rsp],xmm7 1733DB 0x67 1734 mov r10d,r9d 1735 shl r9d,3 1736 shl r10d,3+2 1737 neg r9 1738 mov r8,QWORD[r8] 1739 1740 1741 1742 1743 1744 1745 1746 lea r11,[((-64))+r9*2+rsp] 1747 sub r11,rsi 1748 and r11,4095 1749 cmp r10,r11 1750 jb NEAR $L$from_sp_alt 1751 sub rsp,r11 1752 lea rsp,[((-64))+r9*2+rsp] 1753 jmp NEAR $L$from_sp_done 1754 1755ALIGN 32 1756$L$from_sp_alt: 1757 lea r10,[((4096-64))+r9*2] 1758 lea rsp,[((-64))+r9*2+rsp] 1759 sub r11,r10 1760 mov r10,0 1761 cmovc r11,r10 1762 sub rsp,r11 1763$L$from_sp_done: 1764 and rsp,-64 1765 mov r10,r9 1766 neg r9 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 mov QWORD[32+rsp],r8 1778 mov QWORD[40+rsp],rax 1779$L$from_body: 1780 mov r11,r9 1781 lea rax,[48+rsp] 1782 pxor xmm0,xmm0 1783 jmp NEAR $L$mul_by_1 1784 1785ALIGN 32 1786$L$mul_by_1: 1787 movdqu xmm1,XMMWORD[rsi] 1788 movdqu xmm2,XMMWORD[16+rsi] 1789 movdqu xmm3,XMMWORD[32+rsi] 1790 movdqa XMMWORD[r9*1+rax],xmm0 1791 movdqu xmm4,XMMWORD[48+rsi] 1792 movdqa XMMWORD[16+r9*1+rax],xmm0 1793DB 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 1794 movdqa XMMWORD[rax],xmm1 1795 movdqa XMMWORD[32+r9*1+rax],xmm0 1796 movdqa XMMWORD[16+rax],xmm2 1797 movdqa XMMWORD[48+r9*1+rax],xmm0 1798 movdqa XMMWORD[32+rax],xmm3 1799 movdqa XMMWORD[48+rax],xmm4 1800 lea rax,[64+rax] 1801 sub r11,64 1802 jnz NEAR $L$mul_by_1 1803 1804DB 102,72,15,110,207 1805DB 102,72,15,110,209 1806DB 0x67 1807 mov rbp,rcx 1808DB 102,73,15,110,218 1809 call sqr8x_reduction 1810 1811 pxor xmm0,xmm0 1812 lea rax,[48+rsp] 1813 mov rsi,QWORD[40+rsp] 1814 jmp NEAR $L$from_mont_zero 1815 1816ALIGN 32 1817$L$from_mont_zero: 1818 movdqa XMMWORD[rax],xmm0 1819 movdqa XMMWORD[16+rax],xmm0 1820 movdqa XMMWORD[32+rax],xmm0 1821 movdqa XMMWORD[48+rax],xmm0 1822 lea rax,[64+rax] 1823 sub r9,32 1824 jnz NEAR $L$from_mont_zero 1825 1826 mov rax,1 1827 mov r15,QWORD[((-48))+rsi] 1828 mov r14,QWORD[((-40))+rsi] 1829 mov r13,QWORD[((-32))+rsi] 1830 mov r12,QWORD[((-24))+rsi] 1831 mov rbp,QWORD[((-16))+rsi] 1832 mov rbx,QWORD[((-8))+rsi] 1833 lea rsp,[rsi] 1834$L$from_epilogue: 1835 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 1836 mov rsi,QWORD[16+rsp] 1837 DB 0F3h,0C3h ;repret 1838$L$SEH_end_bn_from_mont8x: 1839global bn_scatter5 1840 1841ALIGN 16 1842bn_scatter5: 1843 cmp edx,0 1844 jz NEAR $L$scatter_epilogue 1845 lea r8,[r9*8+r8] 1846$L$scatter: 1847 mov rax,QWORD[rcx] 1848 lea rcx,[8+rcx] 1849 mov QWORD[r8],rax 1850 lea r8,[256+r8] 1851 sub edx,1 1852 jnz NEAR $L$scatter 1853$L$scatter_epilogue: 1854 DB 0F3h,0C3h ;repret 1855 1856 1857global bn_gather5 1858 1859ALIGN 16 1860bn_gather5: 1861$L$SEH_begin_bn_gather5: 1862 1863DB 0x48,0x83,0xec,0x28 1864DB 0x0f,0x29,0x34,0x24 1865DB 0x0f,0x29,0x7c,0x24,0x10 1866 mov r11d,r9d 1867 shr r9d,3 1868 and r11,7 1869 not r9d 1870 lea rax,[$L$magic_masks] 1871 and r9d,3 1872 lea r8,[128+r11*8+r8] 1873 movq xmm4,QWORD[r9*8+rax] 1874 movq xmm5,QWORD[8+r9*8+rax] 1875 movq xmm6,QWORD[16+r9*8+rax] 1876 movq xmm7,QWORD[24+r9*8+rax] 1877 jmp NEAR $L$gather 1878ALIGN 16 1879$L$gather: 1880 movq xmm0,QWORD[(((-128)))+r8] 1881 movq xmm1,QWORD[((-64))+r8] 1882 pand xmm0,xmm4 1883 movq xmm2,QWORD[r8] 1884 pand xmm1,xmm5 1885 movq xmm3,QWORD[64+r8] 1886 pand xmm2,xmm6 1887 por xmm0,xmm1 1888 pand xmm3,xmm7 1889DB 0x67,0x67 1890 por xmm0,xmm2 1891 lea r8,[256+r8] 1892 por xmm0,xmm3 1893 1894 movq QWORD[rcx],xmm0 1895 lea rcx,[8+rcx] 1896 sub edx,1 1897 jnz NEAR $L$gather 1898 movaps xmm6,XMMWORD[rsp] 1899 movaps xmm7,XMMWORD[16+rsp] 1900 lea rsp,[40+rsp] 1901 DB 0F3h,0C3h ;repret 1902$L$SEH_end_bn_gather5: 1903 1904ALIGN 64 1905$L$magic_masks: 1906 DD 0,0,0,0,0,0,-1,-1 1907 DD 0,0,0,0,0,0,0,0 1908DB 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105 1909DB 112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115 1910DB 99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111 1911DB 114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79 1912DB 71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111 1913DB 112,101,110,115,115,108,46,111,114,103,62,0 1914EXTERN __imp_RtlVirtualUnwind 1915 1916ALIGN 16 1917mul_handler: 1918 push rsi 1919 push rdi 1920 push rbx 1921 push rbp 1922 push r12 1923 push r13 1924 push r14 1925 push r15 1926 pushfq 1927 sub rsp,64 1928 1929 mov rax,QWORD[120+r8] 1930 mov rbx,QWORD[248+r8] 1931 1932 mov rsi,QWORD[8+r9] 1933 mov r11,QWORD[56+r9] 1934 1935 mov r10d,DWORD[r11] 1936 lea r10,[r10*1+rsi] 1937 cmp rbx,r10 1938 jb NEAR $L$common_seh_tail 1939 1940 mov rax,QWORD[152+r8] 1941 1942 mov r10d,DWORD[4+r11] 1943 lea r10,[r10*1+rsi] 1944 cmp rbx,r10 1945 jae NEAR $L$common_seh_tail 1946 1947 lea r10,[$L$mul_epilogue] 1948 cmp rbx,r10 1949 jb NEAR $L$body_40 1950 1951 mov r10,QWORD[192+r8] 1952 mov rax,QWORD[8+r10*8+rax] 1953 jmp NEAR $L$body_proceed 1954 1955$L$body_40: 1956 mov rax,QWORD[40+rax] 1957$L$body_proceed: 1958 1959 movaps xmm0,XMMWORD[((-88))+rax] 1960 movaps xmm1,XMMWORD[((-72))+rax] 1961 1962 mov rbx,QWORD[((-8))+rax] 1963 mov rbp,QWORD[((-16))+rax] 1964 mov r12,QWORD[((-24))+rax] 1965 mov r13,QWORD[((-32))+rax] 1966 mov r14,QWORD[((-40))+rax] 1967 mov r15,QWORD[((-48))+rax] 1968 mov QWORD[144+r8],rbx 1969 mov QWORD[160+r8],rbp 1970 mov QWORD[216+r8],r12 1971 mov QWORD[224+r8],r13 1972 mov QWORD[232+r8],r14 1973 mov QWORD[240+r8],r15 1974 movups XMMWORD[512+r8],xmm0 1975 movups XMMWORD[528+r8],xmm1 1976 1977$L$common_seh_tail: 1978 mov rdi,QWORD[8+rax] 1979 mov rsi,QWORD[16+rax] 1980 mov QWORD[152+r8],rax 1981 mov QWORD[168+r8],rsi 1982 mov QWORD[176+r8],rdi 1983 1984 mov rdi,QWORD[40+r9] 1985 mov rsi,r8 1986 mov ecx,154 1987 DD 0xa548f3fc 1988 1989 mov rsi,r9 1990 xor rcx,rcx 1991 mov rdx,QWORD[8+rsi] 1992 mov r8,QWORD[rsi] 1993 mov r9,QWORD[16+rsi] 1994 mov r10,QWORD[40+rsi] 1995 lea r11,[56+rsi] 1996 lea r12,[24+rsi] 1997 mov QWORD[32+rsp],r10 1998 mov QWORD[40+rsp],r11 1999 mov QWORD[48+rsp],r12 2000 mov QWORD[56+rsp],rcx 2001 call QWORD[__imp_RtlVirtualUnwind] 2002 2003 mov eax,1 2004 add rsp,64 2005 popfq 2006 pop r15 2007 pop r14 2008 pop r13 2009 pop r12 2010 pop rbp 2011 pop rbx 2012 pop rdi 2013 pop rsi 2014 DB 0F3h,0C3h ;repret 2015 2016 2017section .pdata rdata align=4 2018ALIGN 4 2019 DD $L$SEH_begin_bn_mul_mont_gather5 wrt ..imagebase 2020 DD $L$SEH_end_bn_mul_mont_gather5 wrt ..imagebase 2021 DD $L$SEH_info_bn_mul_mont_gather5 wrt ..imagebase 2022 2023 DD $L$SEH_begin_bn_mul4x_mont_gather5 wrt ..imagebase 2024 DD $L$SEH_end_bn_mul4x_mont_gather5 wrt ..imagebase 2025 DD $L$SEH_info_bn_mul4x_mont_gather5 wrt ..imagebase 2026 2027 DD $L$SEH_begin_bn_power5 wrt ..imagebase 2028 DD $L$SEH_end_bn_power5 wrt ..imagebase 2029 DD $L$SEH_info_bn_power5 wrt ..imagebase 2030 2031 DD $L$SEH_begin_bn_from_mont8x wrt ..imagebase 2032 DD $L$SEH_end_bn_from_mont8x wrt ..imagebase 2033 DD $L$SEH_info_bn_from_mont8x wrt ..imagebase 2034 DD $L$SEH_begin_bn_gather5 wrt ..imagebase 2035 DD $L$SEH_end_bn_gather5 wrt ..imagebase 2036 DD $L$SEH_info_bn_gather5 wrt ..imagebase 2037 2038section .xdata rdata align=8 2039ALIGN 8 2040$L$SEH_info_bn_mul_mont_gather5: 2041DB 9,0,0,0 2042 DD mul_handler wrt ..imagebase 2043 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 2044ALIGN 8 2045$L$SEH_info_bn_mul4x_mont_gather5: 2046DB 9,0,0,0 2047 DD mul_handler wrt ..imagebase 2048 DD $L$mul4x_body wrt ..imagebase,$L$mul4x_epilogue wrt ..imagebase 2049ALIGN 8 2050$L$SEH_info_bn_power5: 2051DB 9,0,0,0 2052 DD mul_handler wrt ..imagebase 2053 DD $L$power5_body wrt ..imagebase,$L$power5_epilogue wrt ..imagebase 2054ALIGN 8 2055$L$SEH_info_bn_from_mont8x: 2056DB 9,0,0,0 2057 DD mul_handler wrt ..imagebase 2058 DD $L$from_body wrt ..imagebase,$L$from_epilogue wrt ..imagebase 2059ALIGN 8 2060$L$SEH_info_bn_gather5: 2061DB 0x01,0x0d,0x05,0x00 2062DB 0x0d,0x78,0x01,0x00 2063DB 0x08,0x68,0x00,0x00 2064DB 0x04,0x42,0x00,0x00 2065ALIGN 8 2066