1default rel 2%define XMMWORD 3%define YMMWORD 4%define ZMMWORD 5section .text code align=64 6 7 8EXTERN OPENSSL_ia32cap_P 9 10global rsaz_512_sqr 11 12ALIGN 32 13rsaz_512_sqr: 14 mov QWORD[8+rsp],rdi ;WIN64 prologue 15 mov QWORD[16+rsp],rsi 16 mov rax,rsp 17$L$SEH_begin_rsaz_512_sqr: 18 mov rdi,rcx 19 mov rsi,rdx 20 mov rdx,r8 21 mov rcx,r9 22 mov r8,QWORD[40+rsp] 23 24 25 push rbx 26 push rbp 27 push r12 28 push r13 29 push r14 30 push r15 31 32 sub rsp,128+24 33$L$sqr_body: 34 mov rbp,rdx 35 mov rdx,QWORD[rsi] 36 mov rax,QWORD[8+rsi] 37 mov QWORD[128+rsp],rcx 38 jmp NEAR $L$oop_sqr 39 40ALIGN 32 41$L$oop_sqr: 42 mov DWORD[((128+8))+rsp],r8d 43 44 mov rbx,rdx 45 mul rdx 46 mov r8,rax 47 mov rax,QWORD[16+rsi] 48 mov r9,rdx 49 50 mul rbx 51 add r9,rax 52 mov rax,QWORD[24+rsi] 53 mov r10,rdx 54 adc r10,0 55 56 mul rbx 57 add r10,rax 58 mov rax,QWORD[32+rsi] 59 mov r11,rdx 60 adc r11,0 61 62 mul rbx 63 add r11,rax 64 mov rax,QWORD[40+rsi] 65 mov r12,rdx 66 adc r12,0 67 68 mul rbx 69 add r12,rax 70 mov rax,QWORD[48+rsi] 71 mov r13,rdx 72 adc r13,0 73 74 mul rbx 75 add r13,rax 76 mov rax,QWORD[56+rsi] 77 mov r14,rdx 78 adc r14,0 79 80 mul rbx 81 add r14,rax 82 mov rax,rbx 83 mov r15,rdx 84 adc r15,0 85 86 add r8,r8 87 mov rcx,r9 88 adc r9,r9 89 90 mul rax 91 mov QWORD[rsp],rax 92 add r8,rdx 93 adc r9,0 94 95 mov QWORD[8+rsp],r8 96 shr rcx,63 97 98 99 mov r8,QWORD[8+rsi] 100 mov rax,QWORD[16+rsi] 101 mul r8 102 add r10,rax 103 mov rax,QWORD[24+rsi] 104 mov rbx,rdx 105 adc rbx,0 106 107 mul r8 108 add r11,rax 109 mov rax,QWORD[32+rsi] 110 adc rdx,0 111 add r11,rbx 112 mov rbx,rdx 113 adc rbx,0 114 115 mul r8 116 add r12,rax 117 mov rax,QWORD[40+rsi] 118 adc rdx,0 119 add r12,rbx 120 mov rbx,rdx 121 adc rbx,0 122 123 mul r8 124 add r13,rax 125 mov rax,QWORD[48+rsi] 126 adc rdx,0 127 add r13,rbx 128 mov rbx,rdx 129 adc rbx,0 130 131 mul r8 132 add r14,rax 133 mov rax,QWORD[56+rsi] 134 adc rdx,0 135 add r14,rbx 136 mov rbx,rdx 137 adc rbx,0 138 139 mul r8 140 add r15,rax 141 mov rax,r8 142 adc rdx,0 143 add r15,rbx 144 mov r8,rdx 145 mov rdx,r10 146 adc r8,0 147 148 add rdx,rdx 149 lea r10,[r10*2+rcx] 150 mov rbx,r11 151 adc r11,r11 152 153 mul rax 154 add r9,rax 155 adc r10,rdx 156 adc r11,0 157 158 mov QWORD[16+rsp],r9 159 mov QWORD[24+rsp],r10 160 shr rbx,63 161 162 163 mov r9,QWORD[16+rsi] 164 mov rax,QWORD[24+rsi] 165 mul r9 166 add r12,rax 167 mov rax,QWORD[32+rsi] 168 mov rcx,rdx 169 adc rcx,0 170 171 mul r9 172 add r13,rax 173 mov rax,QWORD[40+rsi] 174 adc rdx,0 175 add r13,rcx 176 mov rcx,rdx 177 adc rcx,0 178 179 mul r9 180 add r14,rax 181 mov rax,QWORD[48+rsi] 182 adc rdx,0 183 add r14,rcx 184 mov rcx,rdx 185 adc rcx,0 186 187 mul r9 188 mov r10,r12 189 lea r12,[r12*2+rbx] 190 add r15,rax 191 mov rax,QWORD[56+rsi] 192 adc rdx,0 193 add r15,rcx 194 mov rcx,rdx 195 adc rcx,0 196 197 mul r9 198 shr r10,63 199 add r8,rax 200 mov rax,r9 201 adc rdx,0 202 add r8,rcx 203 mov r9,rdx 204 adc r9,0 205 206 mov rcx,r13 207 lea r13,[r13*2+r10] 208 209 mul rax 210 add r11,rax 211 adc r12,rdx 212 adc r13,0 213 214 mov QWORD[32+rsp],r11 215 mov QWORD[40+rsp],r12 216 shr rcx,63 217 218 219 mov r10,QWORD[24+rsi] 220 mov rax,QWORD[32+rsi] 221 mul r10 222 add r14,rax 223 mov rax,QWORD[40+rsi] 224 mov rbx,rdx 225 adc rbx,0 226 227 mul r10 228 add r15,rax 229 mov rax,QWORD[48+rsi] 230 adc rdx,0 231 add r15,rbx 232 mov rbx,rdx 233 adc rbx,0 234 235 mul r10 236 mov r12,r14 237 lea r14,[r14*2+rcx] 238 add r8,rax 239 mov rax,QWORD[56+rsi] 240 adc rdx,0 241 add r8,rbx 242 mov rbx,rdx 243 adc rbx,0 244 245 mul r10 246 shr r12,63 247 add r9,rax 248 mov rax,r10 249 adc rdx,0 250 add r9,rbx 251 mov r10,rdx 252 adc r10,0 253 254 mov rbx,r15 255 lea r15,[r15*2+r12] 256 257 mul rax 258 add r13,rax 259 adc r14,rdx 260 adc r15,0 261 262 mov QWORD[48+rsp],r13 263 mov QWORD[56+rsp],r14 264 shr rbx,63 265 266 267 mov r11,QWORD[32+rsi] 268 mov rax,QWORD[40+rsi] 269 mul r11 270 add r8,rax 271 mov rax,QWORD[48+rsi] 272 mov rcx,rdx 273 adc rcx,0 274 275 mul r11 276 add r9,rax 277 mov rax,QWORD[56+rsi] 278 adc rdx,0 279 mov r12,r8 280 lea r8,[r8*2+rbx] 281 add r9,rcx 282 mov rcx,rdx 283 adc rcx,0 284 285 mul r11 286 shr r12,63 287 add r10,rax 288 mov rax,r11 289 adc rdx,0 290 add r10,rcx 291 mov r11,rdx 292 adc r11,0 293 294 mov rcx,r9 295 lea r9,[r9*2+r12] 296 297 mul rax 298 add r15,rax 299 adc r8,rdx 300 adc r9,0 301 302 mov QWORD[64+rsp],r15 303 mov QWORD[72+rsp],r8 304 shr rcx,63 305 306 307 mov r12,QWORD[40+rsi] 308 mov rax,QWORD[48+rsi] 309 mul r12 310 add r10,rax 311 mov rax,QWORD[56+rsi] 312 mov rbx,rdx 313 adc rbx,0 314 315 mul r12 316 add r11,rax 317 mov rax,r12 318 mov r15,r10 319 lea r10,[r10*2+rcx] 320 adc rdx,0 321 shr r15,63 322 add r11,rbx 323 mov r12,rdx 324 adc r12,0 325 326 mov rbx,r11 327 lea r11,[r11*2+r15] 328 329 mul rax 330 add r9,rax 331 adc r10,rdx 332 adc r11,0 333 334 mov QWORD[80+rsp],r9 335 mov QWORD[88+rsp],r10 336 337 338 mov r13,QWORD[48+rsi] 339 mov rax,QWORD[56+rsi] 340 mul r13 341 add r12,rax 342 mov rax,r13 343 mov r13,rdx 344 adc r13,0 345 346 xor r14,r14 347 shl rbx,1 348 adc r12,r12 349 adc r13,r13 350 adc r14,r14 351 352 mul rax 353 add r11,rax 354 adc r12,rdx 355 adc r13,0 356 357 mov QWORD[96+rsp],r11 358 mov QWORD[104+rsp],r12 359 360 361 mov rax,QWORD[56+rsi] 362 mul rax 363 add r13,rax 364 adc rdx,0 365 366 add r14,rdx 367 368 mov QWORD[112+rsp],r13 369 mov QWORD[120+rsp],r14 370 371 mov r8,QWORD[rsp] 372 mov r9,QWORD[8+rsp] 373 mov r10,QWORD[16+rsp] 374 mov r11,QWORD[24+rsp] 375 mov r12,QWORD[32+rsp] 376 mov r13,QWORD[40+rsp] 377 mov r14,QWORD[48+rsp] 378 mov r15,QWORD[56+rsp] 379 380 call __rsaz_512_reduce 381 382 add r8,QWORD[64+rsp] 383 adc r9,QWORD[72+rsp] 384 adc r10,QWORD[80+rsp] 385 adc r11,QWORD[88+rsp] 386 adc r12,QWORD[96+rsp] 387 adc r13,QWORD[104+rsp] 388 adc r14,QWORD[112+rsp] 389 adc r15,QWORD[120+rsp] 390 sbb rcx,rcx 391 392 call __rsaz_512_subtract 393 394 mov rdx,r8 395 mov rax,r9 396 mov r8d,DWORD[((128+8))+rsp] 397 mov rsi,rdi 398 399 dec r8d 400 jnz NEAR $L$oop_sqr 401 402 lea rax,[((128+24+48))+rsp] 403 mov r15,QWORD[((-48))+rax] 404 mov r14,QWORD[((-40))+rax] 405 mov r13,QWORD[((-32))+rax] 406 mov r12,QWORD[((-24))+rax] 407 mov rbp,QWORD[((-16))+rax] 408 mov rbx,QWORD[((-8))+rax] 409 lea rsp,[rax] 410$L$sqr_epilogue: 411 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 412 mov rsi,QWORD[16+rsp] 413 DB 0F3h,0C3h ;repret 414$L$SEH_end_rsaz_512_sqr: 415global rsaz_512_mul 416 417ALIGN 32 418rsaz_512_mul: 419 mov QWORD[8+rsp],rdi ;WIN64 prologue 420 mov QWORD[16+rsp],rsi 421 mov rax,rsp 422$L$SEH_begin_rsaz_512_mul: 423 mov rdi,rcx 424 mov rsi,rdx 425 mov rdx,r8 426 mov rcx,r9 427 mov r8,QWORD[40+rsp] 428 429 430 push rbx 431 push rbp 432 push r12 433 push r13 434 push r14 435 push r15 436 437 sub rsp,128+24 438$L$mul_body: 439DB 102,72,15,110,199 440DB 102,72,15,110,201 441 mov QWORD[128+rsp],r8 442 mov rbx,QWORD[rdx] 443 mov rbp,rdx 444 call __rsaz_512_mul 445 446DB 102,72,15,126,199 447DB 102,72,15,126,205 448 449 mov r8,QWORD[rsp] 450 mov r9,QWORD[8+rsp] 451 mov r10,QWORD[16+rsp] 452 mov r11,QWORD[24+rsp] 453 mov r12,QWORD[32+rsp] 454 mov r13,QWORD[40+rsp] 455 mov r14,QWORD[48+rsp] 456 mov r15,QWORD[56+rsp] 457 458 call __rsaz_512_reduce 459 add r8,QWORD[64+rsp] 460 adc r9,QWORD[72+rsp] 461 adc r10,QWORD[80+rsp] 462 adc r11,QWORD[88+rsp] 463 adc r12,QWORD[96+rsp] 464 adc r13,QWORD[104+rsp] 465 adc r14,QWORD[112+rsp] 466 adc r15,QWORD[120+rsp] 467 sbb rcx,rcx 468 469 call __rsaz_512_subtract 470 471 lea rax,[((128+24+48))+rsp] 472 mov r15,QWORD[((-48))+rax] 473 mov r14,QWORD[((-40))+rax] 474 mov r13,QWORD[((-32))+rax] 475 mov r12,QWORD[((-24))+rax] 476 mov rbp,QWORD[((-16))+rax] 477 mov rbx,QWORD[((-8))+rax] 478 lea rsp,[rax] 479$L$mul_epilogue: 480 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 481 mov rsi,QWORD[16+rsp] 482 DB 0F3h,0C3h ;repret 483$L$SEH_end_rsaz_512_mul: 484global rsaz_512_mul_gather4 485 486ALIGN 32 487rsaz_512_mul_gather4: 488 mov QWORD[8+rsp],rdi ;WIN64 prologue 489 mov QWORD[16+rsp],rsi 490 mov rax,rsp 491$L$SEH_begin_rsaz_512_mul_gather4: 492 mov rdi,rcx 493 mov rsi,rdx 494 mov rdx,r8 495 mov rcx,r9 496 mov r8,QWORD[40+rsp] 497 mov r9,QWORD[48+rsp] 498 499 500 push rbx 501 push rbp 502 push r12 503 push r13 504 push r14 505 push r15 506 507 mov r9d,r9d 508 sub rsp,128+24 509$L$mul_gather4_body: 510 mov eax,DWORD[64+r9*4+rdx] 511DB 102,72,15,110,199 512 mov ebx,DWORD[r9*4+rdx] 513DB 102,72,15,110,201 514 mov QWORD[128+rsp],r8 515 516 shl rax,32 517 or rbx,rax 518 mov rax,QWORD[rsi] 519 mov rcx,QWORD[8+rsi] 520 lea rbp,[128+r9*4+rdx] 521 mul rbx 522 mov QWORD[rsp],rax 523 mov rax,rcx 524 mov r8,rdx 525 526 mul rbx 527 movd xmm4,DWORD[rbp] 528 add r8,rax 529 mov rax,QWORD[16+rsi] 530 mov r9,rdx 531 adc r9,0 532 533 mul rbx 534 movd xmm5,DWORD[64+rbp] 535 add r9,rax 536 mov rax,QWORD[24+rsi] 537 mov r10,rdx 538 adc r10,0 539 540 mul rbx 541 pslldq xmm5,4 542 add r10,rax 543 mov rax,QWORD[32+rsi] 544 mov r11,rdx 545 adc r11,0 546 547 mul rbx 548 por xmm4,xmm5 549 add r11,rax 550 mov rax,QWORD[40+rsi] 551 mov r12,rdx 552 adc r12,0 553 554 mul rbx 555 add r12,rax 556 mov rax,QWORD[48+rsi] 557 mov r13,rdx 558 adc r13,0 559 560 mul rbx 561 lea rbp,[128+rbp] 562 add r13,rax 563 mov rax,QWORD[56+rsi] 564 mov r14,rdx 565 adc r14,0 566 567 mul rbx 568DB 102,72,15,126,227 569 add r14,rax 570 mov rax,QWORD[rsi] 571 mov r15,rdx 572 adc r15,0 573 574 lea rdi,[8+rsp] 575 mov ecx,7 576 jmp NEAR $L$oop_mul_gather 577 578ALIGN 32 579$L$oop_mul_gather: 580 mul rbx 581 add r8,rax 582 mov rax,QWORD[8+rsi] 583 mov QWORD[rdi],r8 584 mov r8,rdx 585 adc r8,0 586 587 mul rbx 588 movd xmm4,DWORD[rbp] 589 add r9,rax 590 mov rax,QWORD[16+rsi] 591 adc rdx,0 592 add r8,r9 593 mov r9,rdx 594 adc r9,0 595 596 mul rbx 597 movd xmm5,DWORD[64+rbp] 598 add r10,rax 599 mov rax,QWORD[24+rsi] 600 adc rdx,0 601 add r9,r10 602 mov r10,rdx 603 adc r10,0 604 605 mul rbx 606 pslldq xmm5,4 607 add r11,rax 608 mov rax,QWORD[32+rsi] 609 adc rdx,0 610 add r10,r11 611 mov r11,rdx 612 adc r11,0 613 614 mul rbx 615 por xmm4,xmm5 616 add r12,rax 617 mov rax,QWORD[40+rsi] 618 adc rdx,0 619 add r11,r12 620 mov r12,rdx 621 adc r12,0 622 623 mul rbx 624 add r13,rax 625 mov rax,QWORD[48+rsi] 626 adc rdx,0 627 add r12,r13 628 mov r13,rdx 629 adc r13,0 630 631 mul rbx 632 add r14,rax 633 mov rax,QWORD[56+rsi] 634 adc rdx,0 635 add r13,r14 636 mov r14,rdx 637 adc r14,0 638 639 mul rbx 640DB 102,72,15,126,227 641 add r15,rax 642 mov rax,QWORD[rsi] 643 adc rdx,0 644 add r14,r15 645 mov r15,rdx 646 adc r15,0 647 648 lea rbp,[128+rbp] 649 lea rdi,[8+rdi] 650 651 dec ecx 652 jnz NEAR $L$oop_mul_gather 653 654 mov QWORD[rdi],r8 655 mov QWORD[8+rdi],r9 656 mov QWORD[16+rdi],r10 657 mov QWORD[24+rdi],r11 658 mov QWORD[32+rdi],r12 659 mov QWORD[40+rdi],r13 660 mov QWORD[48+rdi],r14 661 mov QWORD[56+rdi],r15 662 663DB 102,72,15,126,199 664DB 102,72,15,126,205 665 666 mov r8,QWORD[rsp] 667 mov r9,QWORD[8+rsp] 668 mov r10,QWORD[16+rsp] 669 mov r11,QWORD[24+rsp] 670 mov r12,QWORD[32+rsp] 671 mov r13,QWORD[40+rsp] 672 mov r14,QWORD[48+rsp] 673 mov r15,QWORD[56+rsp] 674 675 call __rsaz_512_reduce 676 add r8,QWORD[64+rsp] 677 adc r9,QWORD[72+rsp] 678 adc r10,QWORD[80+rsp] 679 adc r11,QWORD[88+rsp] 680 adc r12,QWORD[96+rsp] 681 adc r13,QWORD[104+rsp] 682 adc r14,QWORD[112+rsp] 683 adc r15,QWORD[120+rsp] 684 sbb rcx,rcx 685 686 call __rsaz_512_subtract 687 688 lea rax,[((128+24+48))+rsp] 689 mov r15,QWORD[((-48))+rax] 690 mov r14,QWORD[((-40))+rax] 691 mov r13,QWORD[((-32))+rax] 692 mov r12,QWORD[((-24))+rax] 693 mov rbp,QWORD[((-16))+rax] 694 mov rbx,QWORD[((-8))+rax] 695 lea rsp,[rax] 696$L$mul_gather4_epilogue: 697 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 698 mov rsi,QWORD[16+rsp] 699 DB 0F3h,0C3h ;repret 700$L$SEH_end_rsaz_512_mul_gather4: 701global rsaz_512_mul_scatter4 702 703ALIGN 32 704rsaz_512_mul_scatter4: 705 mov QWORD[8+rsp],rdi ;WIN64 prologue 706 mov QWORD[16+rsp],rsi 707 mov rax,rsp 708$L$SEH_begin_rsaz_512_mul_scatter4: 709 mov rdi,rcx 710 mov rsi,rdx 711 mov rdx,r8 712 mov rcx,r9 713 mov r8,QWORD[40+rsp] 714 mov r9,QWORD[48+rsp] 715 716 717 push rbx 718 push rbp 719 push r12 720 push r13 721 push r14 722 push r15 723 724 mov r9d,r9d 725 sub rsp,128+24 726$L$mul_scatter4_body: 727 lea r8,[r9*4+r8] 728DB 102,72,15,110,199 729DB 102,72,15,110,202 730DB 102,73,15,110,208 731 mov QWORD[128+rsp],rcx 732 733 mov rbp,rdi 734 mov rbx,QWORD[rdi] 735 call __rsaz_512_mul 736 737DB 102,72,15,126,199 738DB 102,72,15,126,205 739 740 mov r8,QWORD[rsp] 741 mov r9,QWORD[8+rsp] 742 mov r10,QWORD[16+rsp] 743 mov r11,QWORD[24+rsp] 744 mov r12,QWORD[32+rsp] 745 mov r13,QWORD[40+rsp] 746 mov r14,QWORD[48+rsp] 747 mov r15,QWORD[56+rsp] 748 749 call __rsaz_512_reduce 750 add r8,QWORD[64+rsp] 751 adc r9,QWORD[72+rsp] 752 adc r10,QWORD[80+rsp] 753 adc r11,QWORD[88+rsp] 754 adc r12,QWORD[96+rsp] 755 adc r13,QWORD[104+rsp] 756 adc r14,QWORD[112+rsp] 757 adc r15,QWORD[120+rsp] 758DB 102,72,15,126,214 759 sbb rcx,rcx 760 761 call __rsaz_512_subtract 762 763 mov DWORD[rsi],r8d 764 shr r8,32 765 mov DWORD[128+rsi],r9d 766 shr r9,32 767 mov DWORD[256+rsi],r10d 768 shr r10,32 769 mov DWORD[384+rsi],r11d 770 shr r11,32 771 mov DWORD[512+rsi],r12d 772 shr r12,32 773 mov DWORD[640+rsi],r13d 774 shr r13,32 775 mov DWORD[768+rsi],r14d 776 shr r14,32 777 mov DWORD[896+rsi],r15d 778 shr r15,32 779 mov DWORD[64+rsi],r8d 780 mov DWORD[192+rsi],r9d 781 mov DWORD[320+rsi],r10d 782 mov DWORD[448+rsi],r11d 783 mov DWORD[576+rsi],r12d 784 mov DWORD[704+rsi],r13d 785 mov DWORD[832+rsi],r14d 786 mov DWORD[960+rsi],r15d 787 788 lea rax,[((128+24+48))+rsp] 789 mov r15,QWORD[((-48))+rax] 790 mov r14,QWORD[((-40))+rax] 791 mov r13,QWORD[((-32))+rax] 792 mov r12,QWORD[((-24))+rax] 793 mov rbp,QWORD[((-16))+rax] 794 mov rbx,QWORD[((-8))+rax] 795 lea rsp,[rax] 796$L$mul_scatter4_epilogue: 797 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 798 mov rsi,QWORD[16+rsp] 799 DB 0F3h,0C3h ;repret 800$L$SEH_end_rsaz_512_mul_scatter4: 801global rsaz_512_mul_by_one 802 803ALIGN 32 804rsaz_512_mul_by_one: 805 mov QWORD[8+rsp],rdi ;WIN64 prologue 806 mov QWORD[16+rsp],rsi 807 mov rax,rsp 808$L$SEH_begin_rsaz_512_mul_by_one: 809 mov rdi,rcx 810 mov rsi,rdx 811 mov rdx,r8 812 mov rcx,r9 813 814 815 push rbx 816 push rbp 817 push r12 818 push r13 819 push r14 820 push r15 821 822 sub rsp,128+24 823$L$mul_by_one_body: 824 mov rbp,rdx 825 mov QWORD[128+rsp],rcx 826 827 mov r8,QWORD[rsi] 828 pxor xmm0,xmm0 829 mov r9,QWORD[8+rsi] 830 mov r10,QWORD[16+rsi] 831 mov r11,QWORD[24+rsi] 832 mov r12,QWORD[32+rsi] 833 mov r13,QWORD[40+rsi] 834 mov r14,QWORD[48+rsi] 835 mov r15,QWORD[56+rsi] 836 837 movdqa XMMWORD[rsp],xmm0 838 movdqa XMMWORD[16+rsp],xmm0 839 movdqa XMMWORD[32+rsp],xmm0 840 movdqa XMMWORD[48+rsp],xmm0 841 movdqa XMMWORD[64+rsp],xmm0 842 movdqa XMMWORD[80+rsp],xmm0 843 movdqa XMMWORD[96+rsp],xmm0 844 call __rsaz_512_reduce 845 mov QWORD[rdi],r8 846 mov QWORD[8+rdi],r9 847 mov QWORD[16+rdi],r10 848 mov QWORD[24+rdi],r11 849 mov QWORD[32+rdi],r12 850 mov QWORD[40+rdi],r13 851 mov QWORD[48+rdi],r14 852 mov QWORD[56+rdi],r15 853 854 lea rax,[((128+24+48))+rsp] 855 mov r15,QWORD[((-48))+rax] 856 mov r14,QWORD[((-40))+rax] 857 mov r13,QWORD[((-32))+rax] 858 mov r12,QWORD[((-24))+rax] 859 mov rbp,QWORD[((-16))+rax] 860 mov rbx,QWORD[((-8))+rax] 861 lea rsp,[rax] 862$L$mul_by_one_epilogue: 863 mov rdi,QWORD[8+rsp] ;WIN64 epilogue 864 mov rsi,QWORD[16+rsp] 865 DB 0F3h,0C3h ;repret 866$L$SEH_end_rsaz_512_mul_by_one: 867 868ALIGN 32 869__rsaz_512_reduce: 870 mov rbx,r8 871 imul rbx,QWORD[((128+8))+rsp] 872 mov rax,QWORD[rbp] 873 mov ecx,8 874 jmp NEAR $L$reduction_loop 875 876ALIGN 32 877$L$reduction_loop: 878 mul rbx 879 mov rax,QWORD[8+rbp] 880 neg r8 881 mov r8,rdx 882 adc r8,0 883 884 mul rbx 885 add r9,rax 886 mov rax,QWORD[16+rbp] 887 adc rdx,0 888 add r8,r9 889 mov r9,rdx 890 adc r9,0 891 892 mul rbx 893 add r10,rax 894 mov rax,QWORD[24+rbp] 895 adc rdx,0 896 add r9,r10 897 mov r10,rdx 898 adc r10,0 899 900 mul rbx 901 add r11,rax 902 mov rax,QWORD[32+rbp] 903 adc rdx,0 904 add r10,r11 905 mov rsi,QWORD[((128+8))+rsp] 906 907 908 adc rdx,0 909 mov r11,rdx 910 911 mul rbx 912 add r12,rax 913 mov rax,QWORD[40+rbp] 914 adc rdx,0 915 imul rsi,r8 916 add r11,r12 917 mov r12,rdx 918 adc r12,0 919 920 mul rbx 921 add r13,rax 922 mov rax,QWORD[48+rbp] 923 adc rdx,0 924 add r12,r13 925 mov r13,rdx 926 adc r13,0 927 928 mul rbx 929 add r14,rax 930 mov rax,QWORD[56+rbp] 931 adc rdx,0 932 add r13,r14 933 mov r14,rdx 934 adc r14,0 935 936 mul rbx 937 mov rbx,rsi 938 add r15,rax 939 mov rax,QWORD[rbp] 940 adc rdx,0 941 add r14,r15 942 mov r15,rdx 943 adc r15,0 944 945 dec ecx 946 jne NEAR $L$reduction_loop 947 948 DB 0F3h,0C3h ;repret 949 950 951ALIGN 32 952__rsaz_512_subtract: 953 mov QWORD[rdi],r8 954 mov QWORD[8+rdi],r9 955 mov QWORD[16+rdi],r10 956 mov QWORD[24+rdi],r11 957 mov QWORD[32+rdi],r12 958 mov QWORD[40+rdi],r13 959 mov QWORD[48+rdi],r14 960 mov QWORD[56+rdi],r15 961 962 mov r8,QWORD[rbp] 963 mov r9,QWORD[8+rbp] 964 neg r8 965 not r9 966 and r8,rcx 967 mov r10,QWORD[16+rbp] 968 and r9,rcx 969 not r10 970 mov r11,QWORD[24+rbp] 971 and r10,rcx 972 not r11 973 mov r12,QWORD[32+rbp] 974 and r11,rcx 975 not r12 976 mov r13,QWORD[40+rbp] 977 and r12,rcx 978 not r13 979 mov r14,QWORD[48+rbp] 980 and r13,rcx 981 not r14 982 mov r15,QWORD[56+rbp] 983 and r14,rcx 984 not r15 985 and r15,rcx 986 987 add r8,QWORD[rdi] 988 adc r9,QWORD[8+rdi] 989 adc r10,QWORD[16+rdi] 990 adc r11,QWORD[24+rdi] 991 adc r12,QWORD[32+rdi] 992 adc r13,QWORD[40+rdi] 993 adc r14,QWORD[48+rdi] 994 adc r15,QWORD[56+rdi] 995 996 mov QWORD[rdi],r8 997 mov QWORD[8+rdi],r9 998 mov QWORD[16+rdi],r10 999 mov QWORD[24+rdi],r11 1000 mov QWORD[32+rdi],r12 1001 mov QWORD[40+rdi],r13 1002 mov QWORD[48+rdi],r14 1003 mov QWORD[56+rdi],r15 1004 1005 DB 0F3h,0C3h ;repret 1006 1007 1008ALIGN 32 1009__rsaz_512_mul: 1010 lea rdi,[8+rsp] 1011 1012 mov rax,QWORD[rsi] 1013 mul rbx 1014 mov QWORD[rdi],rax 1015 mov rax,QWORD[8+rsi] 1016 mov r8,rdx 1017 1018 mul rbx 1019 add r8,rax 1020 mov rax,QWORD[16+rsi] 1021 mov r9,rdx 1022 adc r9,0 1023 1024 mul rbx 1025 add r9,rax 1026 mov rax,QWORD[24+rsi] 1027 mov r10,rdx 1028 adc r10,0 1029 1030 mul rbx 1031 add r10,rax 1032 mov rax,QWORD[32+rsi] 1033 mov r11,rdx 1034 adc r11,0 1035 1036 mul rbx 1037 add r11,rax 1038 mov rax,QWORD[40+rsi] 1039 mov r12,rdx 1040 adc r12,0 1041 1042 mul rbx 1043 add r12,rax 1044 mov rax,QWORD[48+rsi] 1045 mov r13,rdx 1046 adc r13,0 1047 1048 mul rbx 1049 add r13,rax 1050 mov rax,QWORD[56+rsi] 1051 mov r14,rdx 1052 adc r14,0 1053 1054 mul rbx 1055 add r14,rax 1056 mov rax,QWORD[rsi] 1057 mov r15,rdx 1058 adc r15,0 1059 1060 lea rbp,[8+rbp] 1061 lea rdi,[8+rdi] 1062 1063 mov ecx,7 1064 jmp NEAR $L$oop_mul 1065 1066ALIGN 32 1067$L$oop_mul: 1068 mov rbx,QWORD[rbp] 1069 mul rbx 1070 add r8,rax 1071 mov rax,QWORD[8+rsi] 1072 mov QWORD[rdi],r8 1073 mov r8,rdx 1074 adc r8,0 1075 1076 mul rbx 1077 add r9,rax 1078 mov rax,QWORD[16+rsi] 1079 adc rdx,0 1080 add r8,r9 1081 mov r9,rdx 1082 adc r9,0 1083 1084 mul rbx 1085 add r10,rax 1086 mov rax,QWORD[24+rsi] 1087 adc rdx,0 1088 add r9,r10 1089 mov r10,rdx 1090 adc r10,0 1091 1092 mul rbx 1093 add r11,rax 1094 mov rax,QWORD[32+rsi] 1095 adc rdx,0 1096 add r10,r11 1097 mov r11,rdx 1098 adc r11,0 1099 1100 mul rbx 1101 add r12,rax 1102 mov rax,QWORD[40+rsi] 1103 adc rdx,0 1104 add r11,r12 1105 mov r12,rdx 1106 adc r12,0 1107 1108 mul rbx 1109 add r13,rax 1110 mov rax,QWORD[48+rsi] 1111 adc rdx,0 1112 add r12,r13 1113 mov r13,rdx 1114 adc r13,0 1115 1116 mul rbx 1117 add r14,rax 1118 mov rax,QWORD[56+rsi] 1119 adc rdx,0 1120 add r13,r14 1121 mov r14,rdx 1122 lea rbp,[8+rbp] 1123 adc r14,0 1124 1125 mul rbx 1126 add r15,rax 1127 mov rax,QWORD[rsi] 1128 adc rdx,0 1129 add r14,r15 1130 mov r15,rdx 1131 adc r15,0 1132 1133 lea rdi,[8+rdi] 1134 1135 dec ecx 1136 jnz NEAR $L$oop_mul 1137 1138 mov QWORD[rdi],r8 1139 mov QWORD[8+rdi],r9 1140 mov QWORD[16+rdi],r10 1141 mov QWORD[24+rdi],r11 1142 mov QWORD[32+rdi],r12 1143 mov QWORD[40+rdi],r13 1144 mov QWORD[48+rdi],r14 1145 mov QWORD[56+rdi],r15 1146 1147 DB 0F3h,0C3h ;repret 1148 1149global rsaz_512_scatter4 1150 1151ALIGN 16 1152rsaz_512_scatter4: 1153 lea rcx,[r8*4+rcx] 1154 mov r9d,8 1155 jmp NEAR $L$oop_scatter 1156ALIGN 16 1157$L$oop_scatter: 1158 mov rax,QWORD[rdx] 1159 lea rdx,[8+rdx] 1160 mov DWORD[rcx],eax 1161 shr rax,32 1162 mov DWORD[64+rcx],eax 1163 lea rcx,[128+rcx] 1164 dec r9d 1165 jnz NEAR $L$oop_scatter 1166 DB 0F3h,0C3h ;repret 1167 1168 1169global rsaz_512_gather4 1170 1171ALIGN 16 1172rsaz_512_gather4: 1173 lea rdx,[r8*4+rdx] 1174 mov r9d,8 1175 jmp NEAR $L$oop_gather 1176ALIGN 16 1177$L$oop_gather: 1178 mov eax,DWORD[rdx] 1179 mov r8d,DWORD[64+rdx] 1180 lea rdx,[128+rdx] 1181 shl r8,32 1182 or rax,r8 1183 mov QWORD[rcx],rax 1184 lea rcx,[8+rcx] 1185 dec r9d 1186 jnz NEAR $L$oop_gather 1187 DB 0F3h,0C3h ;repret 1188 1189EXTERN __imp_RtlVirtualUnwind 1190 1191ALIGN 16 1192se_handler: 1193 push rsi 1194 push rdi 1195 push rbx 1196 push rbp 1197 push r12 1198 push r13 1199 push r14 1200 push r15 1201 pushfq 1202 sub rsp,64 1203 1204 mov rax,QWORD[120+r8] 1205 mov rbx,QWORD[248+r8] 1206 1207 mov rsi,QWORD[8+r9] 1208 mov r11,QWORD[56+r9] 1209 1210 mov r10d,DWORD[r11] 1211 lea r10,[r10*1+rsi] 1212 cmp rbx,r10 1213 jb NEAR $L$common_seh_tail 1214 1215 mov rax,QWORD[152+r8] 1216 1217 mov r10d,DWORD[4+r11] 1218 lea r10,[r10*1+rsi] 1219 cmp rbx,r10 1220 jae NEAR $L$common_seh_tail 1221 1222 lea rax,[((128+24+48))+rax] 1223 1224 mov rbx,QWORD[((-8))+rax] 1225 mov rbp,QWORD[((-16))+rax] 1226 mov r12,QWORD[((-24))+rax] 1227 mov r13,QWORD[((-32))+rax] 1228 mov r14,QWORD[((-40))+rax] 1229 mov r15,QWORD[((-48))+rax] 1230 mov QWORD[144+r8],rbx 1231 mov QWORD[160+r8],rbp 1232 mov QWORD[216+r8],r12 1233 mov QWORD[224+r8],r13 1234 mov QWORD[232+r8],r14 1235 mov QWORD[240+r8],r15 1236 1237$L$common_seh_tail: 1238 mov rdi,QWORD[8+rax] 1239 mov rsi,QWORD[16+rax] 1240 mov QWORD[152+r8],rax 1241 mov QWORD[168+r8],rsi 1242 mov QWORD[176+r8],rdi 1243 1244 mov rdi,QWORD[40+r9] 1245 mov rsi,r8 1246 mov ecx,154 1247 DD 0xa548f3fc 1248 1249 mov rsi,r9 1250 xor rcx,rcx 1251 mov rdx,QWORD[8+rsi] 1252 mov r8,QWORD[rsi] 1253 mov r9,QWORD[16+rsi] 1254 mov r10,QWORD[40+rsi] 1255 lea r11,[56+rsi] 1256 lea r12,[24+rsi] 1257 mov QWORD[32+rsp],r10 1258 mov QWORD[40+rsp],r11 1259 mov QWORD[48+rsp],r12 1260 mov QWORD[56+rsp],rcx 1261 call QWORD[__imp_RtlVirtualUnwind] 1262 1263 mov eax,1 1264 add rsp,64 1265 popfq 1266 pop r15 1267 pop r14 1268 pop r13 1269 pop r12 1270 pop rbp 1271 pop rbx 1272 pop rdi 1273 pop rsi 1274 DB 0F3h,0C3h ;repret 1275 1276 1277section .pdata rdata align=4 1278ALIGN 4 1279 DD $L$SEH_begin_rsaz_512_sqr wrt ..imagebase 1280 DD $L$SEH_end_rsaz_512_sqr wrt ..imagebase 1281 DD $L$SEH_info_rsaz_512_sqr wrt ..imagebase 1282 1283 DD $L$SEH_begin_rsaz_512_mul wrt ..imagebase 1284 DD $L$SEH_end_rsaz_512_mul wrt ..imagebase 1285 DD $L$SEH_info_rsaz_512_mul wrt ..imagebase 1286 1287 DD $L$SEH_begin_rsaz_512_mul_gather4 wrt ..imagebase 1288 DD $L$SEH_end_rsaz_512_mul_gather4 wrt ..imagebase 1289 DD $L$SEH_info_rsaz_512_mul_gather4 wrt ..imagebase 1290 1291 DD $L$SEH_begin_rsaz_512_mul_scatter4 wrt ..imagebase 1292 DD $L$SEH_end_rsaz_512_mul_scatter4 wrt ..imagebase 1293 DD $L$SEH_info_rsaz_512_mul_scatter4 wrt ..imagebase 1294 1295 DD $L$SEH_begin_rsaz_512_mul_by_one wrt ..imagebase 1296 DD $L$SEH_end_rsaz_512_mul_by_one wrt ..imagebase 1297 DD $L$SEH_info_rsaz_512_mul_by_one wrt ..imagebase 1298 1299section .xdata rdata align=8 1300ALIGN 8 1301$L$SEH_info_rsaz_512_sqr: 1302DB 9,0,0,0 1303 DD se_handler wrt ..imagebase 1304 DD $L$sqr_body wrt ..imagebase,$L$sqr_epilogue wrt ..imagebase 1305$L$SEH_info_rsaz_512_mul: 1306DB 9,0,0,0 1307 DD se_handler wrt ..imagebase 1308 DD $L$mul_body wrt ..imagebase,$L$mul_epilogue wrt ..imagebase 1309$L$SEH_info_rsaz_512_mul_gather4: 1310DB 9,0,0,0 1311 DD se_handler wrt ..imagebase 1312 DD $L$mul_gather4_body wrt ..imagebase,$L$mul_gather4_epilogue wrt ..imagebase 1313$L$SEH_info_rsaz_512_mul_scatter4: 1314DB 9,0,0,0 1315 DD se_handler wrt ..imagebase 1316 DD $L$mul_scatter4_body wrt ..imagebase,$L$mul_scatter4_epilogue wrt ..imagebase 1317$L$SEH_info_rsaz_512_mul_by_one: 1318DB 9,0,0,0 1319 DD se_handler wrt ..imagebase 1320 DD $L$mul_by_one_body wrt ..imagebase,$L$mul_by_one_epilogue wrt ..imagebase 1321