1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11.text 12.extern GFp_ia32cap_P 13.hidden GFp_ia32cap_P 14 15 16.align 64 17.Lpoly: 18.quad 0xffffffffffffffff, 0x00000000ffffffff, 0x0000000000000000, 0xffffffff00000001 19 20.LOne: 21.long 1,1,1,1,1,1,1,1 22.LTwo: 23.long 2,2,2,2,2,2,2,2 24.LThree: 25.long 3,3,3,3,3,3,3,3 26.LONE_mont: 27.quad 0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe 28 29 30.Lord: 31.quad 0xf3b9cac2fc632551, 0xbce6faada7179e84, 0xffffffffffffffff, 0xffffffff00000000 32.LordK: 33.quad 0xccd1c8aaee00bc4f 34 35 36 37.globl GFp_nistz256_add 38.hidden GFp_nistz256_add 39.type GFp_nistz256_add,@function 40.align 32 41GFp_nistz256_add: 42 pushq %r12 43 pushq %r13 44 45 movq 0(%rsi),%r8 46 xorq %r13,%r13 47 movq 8(%rsi),%r9 48 movq 16(%rsi),%r10 49 movq 24(%rsi),%r11 50 leaq .Lpoly(%rip),%rsi 51 52 addq 0(%rdx),%r8 53 adcq 8(%rdx),%r9 54 movq %r8,%rax 55 adcq 16(%rdx),%r10 56 adcq 24(%rdx),%r11 57 movq %r9,%rdx 58 adcq $0,%r13 59 60 subq 0(%rsi),%r8 61 movq %r10,%rcx 62 sbbq 8(%rsi),%r9 63 sbbq 16(%rsi),%r10 64 movq %r11,%r12 65 sbbq 24(%rsi),%r11 66 sbbq $0,%r13 67 68 cmovcq %rax,%r8 69 cmovcq %rdx,%r9 70 movq %r8,0(%rdi) 71 cmovcq %rcx,%r10 72 movq %r9,8(%rdi) 73 cmovcq %r12,%r11 74 movq %r10,16(%rdi) 75 movq %r11,24(%rdi) 76 77 popq %r13 78 popq %r12 79 .byte 0xf3,0xc3 80.size GFp_nistz256_add,.-GFp_nistz256_add 81 82 83 84.globl GFp_nistz256_neg 85.hidden GFp_nistz256_neg 86.type GFp_nistz256_neg,@function 87.align 32 88GFp_nistz256_neg: 89.cfi_startproc 90 pushq %r12 91.cfi_adjust_cfa_offset 8 92.cfi_offset %r12,-16 93 pushq %r13 94.cfi_adjust_cfa_offset 8 95.cfi_offset %r13,-24 96.Lneg_body: 97 98 xorq %r8,%r8 99 xorq %r9,%r9 100 xorq %r10,%r10 101 xorq %r11,%r11 102 xorq %r13,%r13 103 104 subq 0(%rsi),%r8 105 sbbq 8(%rsi),%r9 106 sbbq 16(%rsi),%r10 107 movq %r8,%rax 108 sbbq 24(%rsi),%r11 109 leaq .Lpoly(%rip),%rsi 110 movq %r9,%rdx 111 sbbq $0,%r13 112 113 addq 0(%rsi),%r8 114 movq %r10,%rcx 115 adcq 8(%rsi),%r9 116 adcq 16(%rsi),%r10 117 movq %r11,%r12 118 adcq 24(%rsi),%r11 119 testq %r13,%r13 120 121 cmovzq %rax,%r8 122 cmovzq %rdx,%r9 123 movq %r8,0(%rdi) 124 cmovzq %rcx,%r10 125 movq %r9,8(%rdi) 126 cmovzq %r12,%r11 127 movq %r10,16(%rdi) 128 movq %r11,24(%rdi) 129 130 movq 0(%rsp),%r13 131.cfi_restore %r13 132 movq 8(%rsp),%r12 133.cfi_restore %r12 134 leaq 16(%rsp),%rsp 135.cfi_adjust_cfa_offset -16 136.Lneg_epilogue: 137 .byte 0xf3,0xc3 138.cfi_endproc 139.size GFp_nistz256_neg,.-GFp_nistz256_neg 140 141 142 143 144 145 146.globl GFp_p256_scalar_mul_mont 147.hidden GFp_p256_scalar_mul_mont 148.type GFp_p256_scalar_mul_mont,@function 149.align 32 150GFp_p256_scalar_mul_mont: 151.cfi_startproc 152 leaq GFp_ia32cap_P(%rip),%rcx 153 movq 8(%rcx),%rcx 154 andl $0x80100,%ecx 155 cmpl $0x80100,%ecx 156 je .Lecp_nistz256_ord_mul_montx 157 pushq %rbp 158.cfi_adjust_cfa_offset 8 159.cfi_offset %rbp,-16 160 pushq %rbx 161.cfi_adjust_cfa_offset 8 162.cfi_offset %rbx,-24 163 pushq %r12 164.cfi_adjust_cfa_offset 8 165.cfi_offset %r12,-32 166 pushq %r13 167.cfi_adjust_cfa_offset 8 168.cfi_offset %r13,-40 169 pushq %r14 170.cfi_adjust_cfa_offset 8 171.cfi_offset %r14,-48 172 pushq %r15 173.cfi_adjust_cfa_offset 8 174.cfi_offset %r15,-56 175.Lord_mul_body: 176 177 movq 0(%rdx),%rax 178 movq %rdx,%rbx 179 leaq .Lord(%rip),%r14 180 movq .LordK(%rip),%r15 181 182 183 movq %rax,%rcx 184 mulq 0(%rsi) 185 movq %rax,%r8 186 movq %rcx,%rax 187 movq %rdx,%r9 188 189 mulq 8(%rsi) 190 addq %rax,%r9 191 movq %rcx,%rax 192 adcq $0,%rdx 193 movq %rdx,%r10 194 195 mulq 16(%rsi) 196 addq %rax,%r10 197 movq %rcx,%rax 198 adcq $0,%rdx 199 200 movq %r8,%r13 201 imulq %r15,%r8 202 203 movq %rdx,%r11 204 mulq 24(%rsi) 205 addq %rax,%r11 206 movq %r8,%rax 207 adcq $0,%rdx 208 movq %rdx,%r12 209 210 211 mulq 0(%r14) 212 movq %r8,%rbp 213 addq %rax,%r13 214 movq %r8,%rax 215 adcq $0,%rdx 216 movq %rdx,%rcx 217 218 subq %r8,%r10 219 sbbq $0,%r8 220 221 mulq 8(%r14) 222 addq %rcx,%r9 223 adcq $0,%rdx 224 addq %rax,%r9 225 movq %rbp,%rax 226 adcq %rdx,%r10 227 movq %rbp,%rdx 228 adcq $0,%r8 229 230 shlq $32,%rax 231 shrq $32,%rdx 232 subq %rax,%r11 233 movq 8(%rbx),%rax 234 sbbq %rdx,%rbp 235 236 addq %r8,%r11 237 adcq %rbp,%r12 238 adcq $0,%r13 239 240 241 movq %rax,%rcx 242 mulq 0(%rsi) 243 addq %rax,%r9 244 movq %rcx,%rax 245 adcq $0,%rdx 246 movq %rdx,%rbp 247 248 mulq 8(%rsi) 249 addq %rbp,%r10 250 adcq $0,%rdx 251 addq %rax,%r10 252 movq %rcx,%rax 253 adcq $0,%rdx 254 movq %rdx,%rbp 255 256 mulq 16(%rsi) 257 addq %rbp,%r11 258 adcq $0,%rdx 259 addq %rax,%r11 260 movq %rcx,%rax 261 adcq $0,%rdx 262 263 movq %r9,%rcx 264 imulq %r15,%r9 265 266 movq %rdx,%rbp 267 mulq 24(%rsi) 268 addq %rbp,%r12 269 adcq $0,%rdx 270 xorq %r8,%r8 271 addq %rax,%r12 272 movq %r9,%rax 273 adcq %rdx,%r13 274 adcq $0,%r8 275 276 277 mulq 0(%r14) 278 movq %r9,%rbp 279 addq %rax,%rcx 280 movq %r9,%rax 281 adcq %rdx,%rcx 282 283 subq %r9,%r11 284 sbbq $0,%r9 285 286 mulq 8(%r14) 287 addq %rcx,%r10 288 adcq $0,%rdx 289 addq %rax,%r10 290 movq %rbp,%rax 291 adcq %rdx,%r11 292 movq %rbp,%rdx 293 adcq $0,%r9 294 295 shlq $32,%rax 296 shrq $32,%rdx 297 subq %rax,%r12 298 movq 16(%rbx),%rax 299 sbbq %rdx,%rbp 300 301 addq %r9,%r12 302 adcq %rbp,%r13 303 adcq $0,%r8 304 305 306 movq %rax,%rcx 307 mulq 0(%rsi) 308 addq %rax,%r10 309 movq %rcx,%rax 310 adcq $0,%rdx 311 movq %rdx,%rbp 312 313 mulq 8(%rsi) 314 addq %rbp,%r11 315 adcq $0,%rdx 316 addq %rax,%r11 317 movq %rcx,%rax 318 adcq $0,%rdx 319 movq %rdx,%rbp 320 321 mulq 16(%rsi) 322 addq %rbp,%r12 323 adcq $0,%rdx 324 addq %rax,%r12 325 movq %rcx,%rax 326 adcq $0,%rdx 327 328 movq %r10,%rcx 329 imulq %r15,%r10 330 331 movq %rdx,%rbp 332 mulq 24(%rsi) 333 addq %rbp,%r13 334 adcq $0,%rdx 335 xorq %r9,%r9 336 addq %rax,%r13 337 movq %r10,%rax 338 adcq %rdx,%r8 339 adcq $0,%r9 340 341 342 mulq 0(%r14) 343 movq %r10,%rbp 344 addq %rax,%rcx 345 movq %r10,%rax 346 adcq %rdx,%rcx 347 348 subq %r10,%r12 349 sbbq $0,%r10 350 351 mulq 8(%r14) 352 addq %rcx,%r11 353 adcq $0,%rdx 354 addq %rax,%r11 355 movq %rbp,%rax 356 adcq %rdx,%r12 357 movq %rbp,%rdx 358 adcq $0,%r10 359 360 shlq $32,%rax 361 shrq $32,%rdx 362 subq %rax,%r13 363 movq 24(%rbx),%rax 364 sbbq %rdx,%rbp 365 366 addq %r10,%r13 367 adcq %rbp,%r8 368 adcq $0,%r9 369 370 371 movq %rax,%rcx 372 mulq 0(%rsi) 373 addq %rax,%r11 374 movq %rcx,%rax 375 adcq $0,%rdx 376 movq %rdx,%rbp 377 378 mulq 8(%rsi) 379 addq %rbp,%r12 380 adcq $0,%rdx 381 addq %rax,%r12 382 movq %rcx,%rax 383 adcq $0,%rdx 384 movq %rdx,%rbp 385 386 mulq 16(%rsi) 387 addq %rbp,%r13 388 adcq $0,%rdx 389 addq %rax,%r13 390 movq %rcx,%rax 391 adcq $0,%rdx 392 393 movq %r11,%rcx 394 imulq %r15,%r11 395 396 movq %rdx,%rbp 397 mulq 24(%rsi) 398 addq %rbp,%r8 399 adcq $0,%rdx 400 xorq %r10,%r10 401 addq %rax,%r8 402 movq %r11,%rax 403 adcq %rdx,%r9 404 adcq $0,%r10 405 406 407 mulq 0(%r14) 408 movq %r11,%rbp 409 addq %rax,%rcx 410 movq %r11,%rax 411 adcq %rdx,%rcx 412 413 subq %r11,%r13 414 sbbq $0,%r11 415 416 mulq 8(%r14) 417 addq %rcx,%r12 418 adcq $0,%rdx 419 addq %rax,%r12 420 movq %rbp,%rax 421 adcq %rdx,%r13 422 movq %rbp,%rdx 423 adcq $0,%r11 424 425 shlq $32,%rax 426 shrq $32,%rdx 427 subq %rax,%r8 428 sbbq %rdx,%rbp 429 430 addq %r11,%r8 431 adcq %rbp,%r9 432 adcq $0,%r10 433 434 435 movq %r12,%rsi 436 subq 0(%r14),%r12 437 movq %r13,%r11 438 sbbq 8(%r14),%r13 439 movq %r8,%rcx 440 sbbq 16(%r14),%r8 441 movq %r9,%rbp 442 sbbq 24(%r14),%r9 443 sbbq $0,%r10 444 445 cmovcq %rsi,%r12 446 cmovcq %r11,%r13 447 cmovcq %rcx,%r8 448 cmovcq %rbp,%r9 449 450 movq %r12,0(%rdi) 451 movq %r13,8(%rdi) 452 movq %r8,16(%rdi) 453 movq %r9,24(%rdi) 454 455 movq 0(%rsp),%r15 456.cfi_restore %r15 457 movq 8(%rsp),%r14 458.cfi_restore %r14 459 movq 16(%rsp),%r13 460.cfi_restore %r13 461 movq 24(%rsp),%r12 462.cfi_restore %r12 463 movq 32(%rsp),%rbx 464.cfi_restore %rbx 465 movq 40(%rsp),%rbp 466.cfi_restore %rbp 467 leaq 48(%rsp),%rsp 468.cfi_adjust_cfa_offset -48 469.Lord_mul_epilogue: 470 .byte 0xf3,0xc3 471.cfi_endproc 472.size GFp_p256_scalar_mul_mont,.-GFp_p256_scalar_mul_mont 473 474 475 476 477 478 479 480.globl GFp_p256_scalar_sqr_rep_mont 481.hidden GFp_p256_scalar_sqr_rep_mont 482.type GFp_p256_scalar_sqr_rep_mont,@function 483.align 32 484GFp_p256_scalar_sqr_rep_mont: 485.cfi_startproc 486 leaq GFp_ia32cap_P(%rip),%rcx 487 movq 8(%rcx),%rcx 488 andl $0x80100,%ecx 489 cmpl $0x80100,%ecx 490 je .Lecp_nistz256_ord_sqr_montx 491 pushq %rbp 492.cfi_adjust_cfa_offset 8 493.cfi_offset %rbp,-16 494 pushq %rbx 495.cfi_adjust_cfa_offset 8 496.cfi_offset %rbx,-24 497 pushq %r12 498.cfi_adjust_cfa_offset 8 499.cfi_offset %r12,-32 500 pushq %r13 501.cfi_adjust_cfa_offset 8 502.cfi_offset %r13,-40 503 pushq %r14 504.cfi_adjust_cfa_offset 8 505.cfi_offset %r14,-48 506 pushq %r15 507.cfi_adjust_cfa_offset 8 508.cfi_offset %r15,-56 509.Lord_sqr_body: 510 511 movq 0(%rsi),%r8 512 movq 8(%rsi),%rax 513 movq 16(%rsi),%r14 514 movq 24(%rsi),%r15 515 leaq .Lord(%rip),%rsi 516 movq %rdx,%rbx 517 jmp .Loop_ord_sqr 518 519.align 32 520.Loop_ord_sqr: 521 522 movq %rax,%rbp 523 mulq %r8 524 movq %rax,%r9 525.byte 102,72,15,110,205 526 movq %r14,%rax 527 movq %rdx,%r10 528 529 mulq %r8 530 addq %rax,%r10 531 movq %r15,%rax 532.byte 102,73,15,110,214 533 adcq $0,%rdx 534 movq %rdx,%r11 535 536 mulq %r8 537 addq %rax,%r11 538 movq %r15,%rax 539.byte 102,73,15,110,223 540 adcq $0,%rdx 541 movq %rdx,%r12 542 543 544 mulq %r14 545 movq %rax,%r13 546 movq %r14,%rax 547 movq %rdx,%r14 548 549 550 mulq %rbp 551 addq %rax,%r11 552 movq %r15,%rax 553 adcq $0,%rdx 554 movq %rdx,%r15 555 556 mulq %rbp 557 addq %rax,%r12 558 adcq $0,%rdx 559 560 addq %r15,%r12 561 adcq %rdx,%r13 562 adcq $0,%r14 563 564 565 xorq %r15,%r15 566 movq %r8,%rax 567 addq %r9,%r9 568 adcq %r10,%r10 569 adcq %r11,%r11 570 adcq %r12,%r12 571 adcq %r13,%r13 572 adcq %r14,%r14 573 adcq $0,%r15 574 575 576 mulq %rax 577 movq %rax,%r8 578.byte 102,72,15,126,200 579 movq %rdx,%rbp 580 581 mulq %rax 582 addq %rbp,%r9 583 adcq %rax,%r10 584.byte 102,72,15,126,208 585 adcq $0,%rdx 586 movq %rdx,%rbp 587 588 mulq %rax 589 addq %rbp,%r11 590 adcq %rax,%r12 591.byte 102,72,15,126,216 592 adcq $0,%rdx 593 movq %rdx,%rbp 594 595 movq %r8,%rcx 596 imulq 32(%rsi),%r8 597 598 mulq %rax 599 addq %rbp,%r13 600 adcq %rax,%r14 601 movq 0(%rsi),%rax 602 adcq %rdx,%r15 603 604 605 mulq %r8 606 movq %r8,%rbp 607 addq %rax,%rcx 608 movq 8(%rsi),%rax 609 adcq %rdx,%rcx 610 611 subq %r8,%r10 612 sbbq $0,%rbp 613 614 mulq %r8 615 addq %rcx,%r9 616 adcq $0,%rdx 617 addq %rax,%r9 618 movq %r8,%rax 619 adcq %rdx,%r10 620 movq %r8,%rdx 621 adcq $0,%rbp 622 623 movq %r9,%rcx 624 imulq 32(%rsi),%r9 625 626 shlq $32,%rax 627 shrq $32,%rdx 628 subq %rax,%r11 629 movq 0(%rsi),%rax 630 sbbq %rdx,%r8 631 632 addq %rbp,%r11 633 adcq $0,%r8 634 635 636 mulq %r9 637 movq %r9,%rbp 638 addq %rax,%rcx 639 movq 8(%rsi),%rax 640 adcq %rdx,%rcx 641 642 subq %r9,%r11 643 sbbq $0,%rbp 644 645 mulq %r9 646 addq %rcx,%r10 647 adcq $0,%rdx 648 addq %rax,%r10 649 movq %r9,%rax 650 adcq %rdx,%r11 651 movq %r9,%rdx 652 adcq $0,%rbp 653 654 movq %r10,%rcx 655 imulq 32(%rsi),%r10 656 657 shlq $32,%rax 658 shrq $32,%rdx 659 subq %rax,%r8 660 movq 0(%rsi),%rax 661 sbbq %rdx,%r9 662 663 addq %rbp,%r8 664 adcq $0,%r9 665 666 667 mulq %r10 668 movq %r10,%rbp 669 addq %rax,%rcx 670 movq 8(%rsi),%rax 671 adcq %rdx,%rcx 672 673 subq %r10,%r8 674 sbbq $0,%rbp 675 676 mulq %r10 677 addq %rcx,%r11 678 adcq $0,%rdx 679 addq %rax,%r11 680 movq %r10,%rax 681 adcq %rdx,%r8 682 movq %r10,%rdx 683 adcq $0,%rbp 684 685 movq %r11,%rcx 686 imulq 32(%rsi),%r11 687 688 shlq $32,%rax 689 shrq $32,%rdx 690 subq %rax,%r9 691 movq 0(%rsi),%rax 692 sbbq %rdx,%r10 693 694 addq %rbp,%r9 695 adcq $0,%r10 696 697 698 mulq %r11 699 movq %r11,%rbp 700 addq %rax,%rcx 701 movq 8(%rsi),%rax 702 adcq %rdx,%rcx 703 704 subq %r11,%r9 705 sbbq $0,%rbp 706 707 mulq %r11 708 addq %rcx,%r8 709 adcq $0,%rdx 710 addq %rax,%r8 711 movq %r11,%rax 712 adcq %rdx,%r9 713 movq %r11,%rdx 714 adcq $0,%rbp 715 716 shlq $32,%rax 717 shrq $32,%rdx 718 subq %rax,%r10 719 sbbq %rdx,%r11 720 721 addq %rbp,%r10 722 adcq $0,%r11 723 724 725 xorq %rdx,%rdx 726 addq %r12,%r8 727 adcq %r13,%r9 728 movq %r8,%r12 729 adcq %r14,%r10 730 adcq %r15,%r11 731 movq %r9,%rax 732 adcq $0,%rdx 733 734 735 subq 0(%rsi),%r8 736 movq %r10,%r14 737 sbbq 8(%rsi),%r9 738 sbbq 16(%rsi),%r10 739 movq %r11,%r15 740 sbbq 24(%rsi),%r11 741 sbbq $0,%rdx 742 743 cmovcq %r12,%r8 744 cmovncq %r9,%rax 745 cmovncq %r10,%r14 746 cmovncq %r11,%r15 747 748 decq %rbx 749 jnz .Loop_ord_sqr 750 751 movq %r8,0(%rdi) 752 movq %rax,8(%rdi) 753 pxor %xmm1,%xmm1 754 movq %r14,16(%rdi) 755 pxor %xmm2,%xmm2 756 movq %r15,24(%rdi) 757 pxor %xmm3,%xmm3 758 759 movq 0(%rsp),%r15 760.cfi_restore %r15 761 movq 8(%rsp),%r14 762.cfi_restore %r14 763 movq 16(%rsp),%r13 764.cfi_restore %r13 765 movq 24(%rsp),%r12 766.cfi_restore %r12 767 movq 32(%rsp),%rbx 768.cfi_restore %rbx 769 movq 40(%rsp),%rbp 770.cfi_restore %rbp 771 leaq 48(%rsp),%rsp 772.cfi_adjust_cfa_offset -48 773.Lord_sqr_epilogue: 774 .byte 0xf3,0xc3 775.cfi_endproc 776.size GFp_p256_scalar_sqr_rep_mont,.-GFp_p256_scalar_sqr_rep_mont 777 778.type ecp_nistz256_ord_mul_montx,@function 779.align 32 780ecp_nistz256_ord_mul_montx: 781.cfi_startproc 782.Lecp_nistz256_ord_mul_montx: 783 pushq %rbp 784.cfi_adjust_cfa_offset 8 785.cfi_offset %rbp,-16 786 pushq %rbx 787.cfi_adjust_cfa_offset 8 788.cfi_offset %rbx,-24 789 pushq %r12 790.cfi_adjust_cfa_offset 8 791.cfi_offset %r12,-32 792 pushq %r13 793.cfi_adjust_cfa_offset 8 794.cfi_offset %r13,-40 795 pushq %r14 796.cfi_adjust_cfa_offset 8 797.cfi_offset %r14,-48 798 pushq %r15 799.cfi_adjust_cfa_offset 8 800.cfi_offset %r15,-56 801.Lord_mulx_body: 802 803 movq %rdx,%rbx 804 movq 0(%rdx),%rdx 805 movq 0(%rsi),%r9 806 movq 8(%rsi),%r10 807 movq 16(%rsi),%r11 808 movq 24(%rsi),%r12 809 leaq -128(%rsi),%rsi 810 leaq .Lord-128(%rip),%r14 811 movq .LordK(%rip),%r15 812 813 814 mulxq %r9,%r8,%r9 815 mulxq %r10,%rcx,%r10 816 mulxq %r11,%rbp,%r11 817 addq %rcx,%r9 818 mulxq %r12,%rcx,%r12 819 movq %r8,%rdx 820 mulxq %r15,%rdx,%rax 821 adcq %rbp,%r10 822 adcq %rcx,%r11 823 adcq $0,%r12 824 825 826 xorq %r13,%r13 827 mulxq 0+128(%r14),%rcx,%rbp 828 adcxq %rcx,%r8 829 adoxq %rbp,%r9 830 831 mulxq 8+128(%r14),%rcx,%rbp 832 adcxq %rcx,%r9 833 adoxq %rbp,%r10 834 835 mulxq 16+128(%r14),%rcx,%rbp 836 adcxq %rcx,%r10 837 adoxq %rbp,%r11 838 839 mulxq 24+128(%r14),%rcx,%rbp 840 movq 8(%rbx),%rdx 841 adcxq %rcx,%r11 842 adoxq %rbp,%r12 843 adcxq %r8,%r12 844 adoxq %r8,%r13 845 adcq $0,%r13 846 847 848 mulxq 0+128(%rsi),%rcx,%rbp 849 adcxq %rcx,%r9 850 adoxq %rbp,%r10 851 852 mulxq 8+128(%rsi),%rcx,%rbp 853 adcxq %rcx,%r10 854 adoxq %rbp,%r11 855 856 mulxq 16+128(%rsi),%rcx,%rbp 857 adcxq %rcx,%r11 858 adoxq %rbp,%r12 859 860 mulxq 24+128(%rsi),%rcx,%rbp 861 movq %r9,%rdx 862 mulxq %r15,%rdx,%rax 863 adcxq %rcx,%r12 864 adoxq %rbp,%r13 865 866 adcxq %r8,%r13 867 adoxq %r8,%r8 868 adcq $0,%r8 869 870 871 mulxq 0+128(%r14),%rcx,%rbp 872 adcxq %rcx,%r9 873 adoxq %rbp,%r10 874 875 mulxq 8+128(%r14),%rcx,%rbp 876 adcxq %rcx,%r10 877 adoxq %rbp,%r11 878 879 mulxq 16+128(%r14),%rcx,%rbp 880 adcxq %rcx,%r11 881 adoxq %rbp,%r12 882 883 mulxq 24+128(%r14),%rcx,%rbp 884 movq 16(%rbx),%rdx 885 adcxq %rcx,%r12 886 adoxq %rbp,%r13 887 adcxq %r9,%r13 888 adoxq %r9,%r8 889 adcq $0,%r8 890 891 892 mulxq 0+128(%rsi),%rcx,%rbp 893 adcxq %rcx,%r10 894 adoxq %rbp,%r11 895 896 mulxq 8+128(%rsi),%rcx,%rbp 897 adcxq %rcx,%r11 898 adoxq %rbp,%r12 899 900 mulxq 16+128(%rsi),%rcx,%rbp 901 adcxq %rcx,%r12 902 adoxq %rbp,%r13 903 904 mulxq 24+128(%rsi),%rcx,%rbp 905 movq %r10,%rdx 906 mulxq %r15,%rdx,%rax 907 adcxq %rcx,%r13 908 adoxq %rbp,%r8 909 910 adcxq %r9,%r8 911 adoxq %r9,%r9 912 adcq $0,%r9 913 914 915 mulxq 0+128(%r14),%rcx,%rbp 916 adcxq %rcx,%r10 917 adoxq %rbp,%r11 918 919 mulxq 8+128(%r14),%rcx,%rbp 920 adcxq %rcx,%r11 921 adoxq %rbp,%r12 922 923 mulxq 16+128(%r14),%rcx,%rbp 924 adcxq %rcx,%r12 925 adoxq %rbp,%r13 926 927 mulxq 24+128(%r14),%rcx,%rbp 928 movq 24(%rbx),%rdx 929 adcxq %rcx,%r13 930 adoxq %rbp,%r8 931 adcxq %r10,%r8 932 adoxq %r10,%r9 933 adcq $0,%r9 934 935 936 mulxq 0+128(%rsi),%rcx,%rbp 937 adcxq %rcx,%r11 938 adoxq %rbp,%r12 939 940 mulxq 8+128(%rsi),%rcx,%rbp 941 adcxq %rcx,%r12 942 adoxq %rbp,%r13 943 944 mulxq 16+128(%rsi),%rcx,%rbp 945 adcxq %rcx,%r13 946 adoxq %rbp,%r8 947 948 mulxq 24+128(%rsi),%rcx,%rbp 949 movq %r11,%rdx 950 mulxq %r15,%rdx,%rax 951 adcxq %rcx,%r8 952 adoxq %rbp,%r9 953 954 adcxq %r10,%r9 955 adoxq %r10,%r10 956 adcq $0,%r10 957 958 959 mulxq 0+128(%r14),%rcx,%rbp 960 adcxq %rcx,%r11 961 adoxq %rbp,%r12 962 963 mulxq 8+128(%r14),%rcx,%rbp 964 adcxq %rcx,%r12 965 adoxq %rbp,%r13 966 967 mulxq 16+128(%r14),%rcx,%rbp 968 adcxq %rcx,%r13 969 adoxq %rbp,%r8 970 971 mulxq 24+128(%r14),%rcx,%rbp 972 leaq 128(%r14),%r14 973 movq %r12,%rbx 974 adcxq %rcx,%r8 975 adoxq %rbp,%r9 976 movq %r13,%rdx 977 adcxq %r11,%r9 978 adoxq %r11,%r10 979 adcq $0,%r10 980 981 982 983 movq %r8,%rcx 984 subq 0(%r14),%r12 985 sbbq 8(%r14),%r13 986 sbbq 16(%r14),%r8 987 movq %r9,%rbp 988 sbbq 24(%r14),%r9 989 sbbq $0,%r10 990 991 cmovcq %rbx,%r12 992 cmovcq %rdx,%r13 993 cmovcq %rcx,%r8 994 cmovcq %rbp,%r9 995 996 movq %r12,0(%rdi) 997 movq %r13,8(%rdi) 998 movq %r8,16(%rdi) 999 movq %r9,24(%rdi) 1000 1001 movq 0(%rsp),%r15 1002.cfi_restore %r15 1003 movq 8(%rsp),%r14 1004.cfi_restore %r14 1005 movq 16(%rsp),%r13 1006.cfi_restore %r13 1007 movq 24(%rsp),%r12 1008.cfi_restore %r12 1009 movq 32(%rsp),%rbx 1010.cfi_restore %rbx 1011 movq 40(%rsp),%rbp 1012.cfi_restore %rbp 1013 leaq 48(%rsp),%rsp 1014.cfi_adjust_cfa_offset -48 1015.Lord_mulx_epilogue: 1016 .byte 0xf3,0xc3 1017.cfi_endproc 1018.size ecp_nistz256_ord_mul_montx,.-ecp_nistz256_ord_mul_montx 1019 1020.type ecp_nistz256_ord_sqr_montx,@function 1021.align 32 1022ecp_nistz256_ord_sqr_montx: 1023.cfi_startproc 1024.Lecp_nistz256_ord_sqr_montx: 1025 pushq %rbp 1026.cfi_adjust_cfa_offset 8 1027.cfi_offset %rbp,-16 1028 pushq %rbx 1029.cfi_adjust_cfa_offset 8 1030.cfi_offset %rbx,-24 1031 pushq %r12 1032.cfi_adjust_cfa_offset 8 1033.cfi_offset %r12,-32 1034 pushq %r13 1035.cfi_adjust_cfa_offset 8 1036.cfi_offset %r13,-40 1037 pushq %r14 1038.cfi_adjust_cfa_offset 8 1039.cfi_offset %r14,-48 1040 pushq %r15 1041.cfi_adjust_cfa_offset 8 1042.cfi_offset %r15,-56 1043.Lord_sqrx_body: 1044 1045 movq %rdx,%rbx 1046 movq 0(%rsi),%rdx 1047 movq 8(%rsi),%r14 1048 movq 16(%rsi),%r15 1049 movq 24(%rsi),%r8 1050 leaq .Lord(%rip),%rsi 1051 jmp .Loop_ord_sqrx 1052 1053.align 32 1054.Loop_ord_sqrx: 1055 mulxq %r14,%r9,%r10 1056 mulxq %r15,%rcx,%r11 1057 movq %rdx,%rax 1058.byte 102,73,15,110,206 1059 mulxq %r8,%rbp,%r12 1060 movq %r14,%rdx 1061 addq %rcx,%r10 1062.byte 102,73,15,110,215 1063 adcq %rbp,%r11 1064 adcq $0,%r12 1065 xorq %r13,%r13 1066 1067 mulxq %r15,%rcx,%rbp 1068 adcxq %rcx,%r11 1069 adoxq %rbp,%r12 1070 1071 mulxq %r8,%rcx,%rbp 1072 movq %r15,%rdx 1073 adcxq %rcx,%r12 1074 adoxq %rbp,%r13 1075 adcq $0,%r13 1076 1077 mulxq %r8,%rcx,%r14 1078 movq %rax,%rdx 1079.byte 102,73,15,110,216 1080 xorq %r15,%r15 1081 adcxq %r9,%r9 1082 adoxq %rcx,%r13 1083 adcxq %r10,%r10 1084 adoxq %r15,%r14 1085 1086 1087 mulxq %rdx,%r8,%rbp 1088.byte 102,72,15,126,202 1089 adcxq %r11,%r11 1090 adoxq %rbp,%r9 1091 adcxq %r12,%r12 1092 mulxq %rdx,%rcx,%rax 1093.byte 102,72,15,126,210 1094 adcxq %r13,%r13 1095 adoxq %rcx,%r10 1096 adcxq %r14,%r14 1097 mulxq %rdx,%rcx,%rbp 1098.byte 0x67 1099.byte 102,72,15,126,218 1100 adoxq %rax,%r11 1101 adcxq %r15,%r15 1102 adoxq %rcx,%r12 1103 adoxq %rbp,%r13 1104 mulxq %rdx,%rcx,%rax 1105 adoxq %rcx,%r14 1106 adoxq %rax,%r15 1107 1108 1109 movq %r8,%rdx 1110 mulxq 32(%rsi),%rdx,%rcx 1111 1112 xorq %rax,%rax 1113 mulxq 0(%rsi),%rcx,%rbp 1114 adcxq %rcx,%r8 1115 adoxq %rbp,%r9 1116 mulxq 8(%rsi),%rcx,%rbp 1117 adcxq %rcx,%r9 1118 adoxq %rbp,%r10 1119 mulxq 16(%rsi),%rcx,%rbp 1120 adcxq %rcx,%r10 1121 adoxq %rbp,%r11 1122 mulxq 24(%rsi),%rcx,%rbp 1123 adcxq %rcx,%r11 1124 adoxq %rbp,%r8 1125 adcxq %rax,%r8 1126 1127 1128 movq %r9,%rdx 1129 mulxq 32(%rsi),%rdx,%rcx 1130 1131 mulxq 0(%rsi),%rcx,%rbp 1132 adoxq %rcx,%r9 1133 adcxq %rbp,%r10 1134 mulxq 8(%rsi),%rcx,%rbp 1135 adoxq %rcx,%r10 1136 adcxq %rbp,%r11 1137 mulxq 16(%rsi),%rcx,%rbp 1138 adoxq %rcx,%r11 1139 adcxq %rbp,%r8 1140 mulxq 24(%rsi),%rcx,%rbp 1141 adoxq %rcx,%r8 1142 adcxq %rbp,%r9 1143 adoxq %rax,%r9 1144 1145 1146 movq %r10,%rdx 1147 mulxq 32(%rsi),%rdx,%rcx 1148 1149 mulxq 0(%rsi),%rcx,%rbp 1150 adcxq %rcx,%r10 1151 adoxq %rbp,%r11 1152 mulxq 8(%rsi),%rcx,%rbp 1153 adcxq %rcx,%r11 1154 adoxq %rbp,%r8 1155 mulxq 16(%rsi),%rcx,%rbp 1156 adcxq %rcx,%r8 1157 adoxq %rbp,%r9 1158 mulxq 24(%rsi),%rcx,%rbp 1159 adcxq %rcx,%r9 1160 adoxq %rbp,%r10 1161 adcxq %rax,%r10 1162 1163 1164 movq %r11,%rdx 1165 mulxq 32(%rsi),%rdx,%rcx 1166 1167 mulxq 0(%rsi),%rcx,%rbp 1168 adoxq %rcx,%r11 1169 adcxq %rbp,%r8 1170 mulxq 8(%rsi),%rcx,%rbp 1171 adoxq %rcx,%r8 1172 adcxq %rbp,%r9 1173 mulxq 16(%rsi),%rcx,%rbp 1174 adoxq %rcx,%r9 1175 adcxq %rbp,%r10 1176 mulxq 24(%rsi),%rcx,%rbp 1177 adoxq %rcx,%r10 1178 adcxq %rbp,%r11 1179 adoxq %rax,%r11 1180 1181 1182 addq %r8,%r12 1183 adcq %r13,%r9 1184 movq %r12,%rdx 1185 adcq %r14,%r10 1186 adcq %r15,%r11 1187 movq %r9,%r14 1188 adcq $0,%rax 1189 1190 1191 subq 0(%rsi),%r12 1192 movq %r10,%r15 1193 sbbq 8(%rsi),%r9 1194 sbbq 16(%rsi),%r10 1195 movq %r11,%r8 1196 sbbq 24(%rsi),%r11 1197 sbbq $0,%rax 1198 1199 cmovncq %r12,%rdx 1200 cmovncq %r9,%r14 1201 cmovncq %r10,%r15 1202 cmovncq %r11,%r8 1203 1204 decq %rbx 1205 jnz .Loop_ord_sqrx 1206 1207 movq %rdx,0(%rdi) 1208 movq %r14,8(%rdi) 1209 pxor %xmm1,%xmm1 1210 movq %r15,16(%rdi) 1211 pxor %xmm2,%xmm2 1212 movq %r8,24(%rdi) 1213 pxor %xmm3,%xmm3 1214 1215 movq 0(%rsp),%r15 1216.cfi_restore %r15 1217 movq 8(%rsp),%r14 1218.cfi_restore %r14 1219 movq 16(%rsp),%r13 1220.cfi_restore %r13 1221 movq 24(%rsp),%r12 1222.cfi_restore %r12 1223 movq 32(%rsp),%rbx 1224.cfi_restore %rbx 1225 movq 40(%rsp),%rbp 1226.cfi_restore %rbp 1227 leaq 48(%rsp),%rsp 1228.cfi_adjust_cfa_offset -48 1229.Lord_sqrx_epilogue: 1230 .byte 0xf3,0xc3 1231.cfi_endproc 1232.size ecp_nistz256_ord_sqr_montx,.-ecp_nistz256_ord_sqr_montx 1233 1234 1235 1236 1237 1238 1239.globl GFp_nistz256_mul_mont 1240.hidden GFp_nistz256_mul_mont 1241.type GFp_nistz256_mul_mont,@function 1242.align 32 1243GFp_nistz256_mul_mont: 1244.cfi_startproc 1245 leaq GFp_ia32cap_P(%rip),%rcx 1246 movq 8(%rcx),%rcx 1247 andl $0x80100,%ecx 1248.Lmul_mont: 1249 pushq %rbp 1250.cfi_adjust_cfa_offset 8 1251.cfi_offset %rbp,-16 1252 pushq %rbx 1253.cfi_adjust_cfa_offset 8 1254.cfi_offset %rbx,-24 1255 pushq %r12 1256.cfi_adjust_cfa_offset 8 1257.cfi_offset %r12,-32 1258 pushq %r13 1259.cfi_adjust_cfa_offset 8 1260.cfi_offset %r13,-40 1261 pushq %r14 1262.cfi_adjust_cfa_offset 8 1263.cfi_offset %r14,-48 1264 pushq %r15 1265.cfi_adjust_cfa_offset 8 1266.cfi_offset %r15,-56 1267.Lmul_body: 1268 cmpl $0x80100,%ecx 1269 je .Lmul_montx 1270 movq %rdx,%rbx 1271 movq 0(%rdx),%rax 1272 movq 0(%rsi),%r9 1273 movq 8(%rsi),%r10 1274 movq 16(%rsi),%r11 1275 movq 24(%rsi),%r12 1276 1277 call __ecp_nistz256_mul_montq 1278 jmp .Lmul_mont_done 1279 1280.align 32 1281.Lmul_montx: 1282 movq %rdx,%rbx 1283 movq 0(%rdx),%rdx 1284 movq 0(%rsi),%r9 1285 movq 8(%rsi),%r10 1286 movq 16(%rsi),%r11 1287 movq 24(%rsi),%r12 1288 leaq -128(%rsi),%rsi 1289 1290 call __ecp_nistz256_mul_montx 1291.Lmul_mont_done: 1292 movq 0(%rsp),%r15 1293.cfi_restore %r15 1294 movq 8(%rsp),%r14 1295.cfi_restore %r14 1296 movq 16(%rsp),%r13 1297.cfi_restore %r13 1298 movq 24(%rsp),%r12 1299.cfi_restore %r12 1300 movq 32(%rsp),%rbx 1301.cfi_restore %rbx 1302 movq 40(%rsp),%rbp 1303.cfi_restore %rbp 1304 leaq 48(%rsp),%rsp 1305.cfi_adjust_cfa_offset -48 1306.Lmul_epilogue: 1307 .byte 0xf3,0xc3 1308.cfi_endproc 1309.size GFp_nistz256_mul_mont,.-GFp_nistz256_mul_mont 1310 1311.type __ecp_nistz256_mul_montq,@function 1312.align 32 1313__ecp_nistz256_mul_montq: 1314.cfi_startproc 1315 1316 1317 movq %rax,%rbp 1318 mulq %r9 1319 movq .Lpoly+8(%rip),%r14 1320 movq %rax,%r8 1321 movq %rbp,%rax 1322 movq %rdx,%r9 1323 1324 mulq %r10 1325 movq .Lpoly+24(%rip),%r15 1326 addq %rax,%r9 1327 movq %rbp,%rax 1328 adcq $0,%rdx 1329 movq %rdx,%r10 1330 1331 mulq %r11 1332 addq %rax,%r10 1333 movq %rbp,%rax 1334 adcq $0,%rdx 1335 movq %rdx,%r11 1336 1337 mulq %r12 1338 addq %rax,%r11 1339 movq %r8,%rax 1340 adcq $0,%rdx 1341 xorq %r13,%r13 1342 movq %rdx,%r12 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 movq %r8,%rbp 1354 shlq $32,%r8 1355 mulq %r15 1356 shrq $32,%rbp 1357 addq %r8,%r9 1358 adcq %rbp,%r10 1359 adcq %rax,%r11 1360 movq 8(%rbx),%rax 1361 adcq %rdx,%r12 1362 adcq $0,%r13 1363 xorq %r8,%r8 1364 1365 1366 1367 movq %rax,%rbp 1368 mulq 0(%rsi) 1369 addq %rax,%r9 1370 movq %rbp,%rax 1371 adcq $0,%rdx 1372 movq %rdx,%rcx 1373 1374 mulq 8(%rsi) 1375 addq %rcx,%r10 1376 adcq $0,%rdx 1377 addq %rax,%r10 1378 movq %rbp,%rax 1379 adcq $0,%rdx 1380 movq %rdx,%rcx 1381 1382 mulq 16(%rsi) 1383 addq %rcx,%r11 1384 adcq $0,%rdx 1385 addq %rax,%r11 1386 movq %rbp,%rax 1387 adcq $0,%rdx 1388 movq %rdx,%rcx 1389 1390 mulq 24(%rsi) 1391 addq %rcx,%r12 1392 adcq $0,%rdx 1393 addq %rax,%r12 1394 movq %r9,%rax 1395 adcq %rdx,%r13 1396 adcq $0,%r8 1397 1398 1399 1400 movq %r9,%rbp 1401 shlq $32,%r9 1402 mulq %r15 1403 shrq $32,%rbp 1404 addq %r9,%r10 1405 adcq %rbp,%r11 1406 adcq %rax,%r12 1407 movq 16(%rbx),%rax 1408 adcq %rdx,%r13 1409 adcq $0,%r8 1410 xorq %r9,%r9 1411 1412 1413 1414 movq %rax,%rbp 1415 mulq 0(%rsi) 1416 addq %rax,%r10 1417 movq %rbp,%rax 1418 adcq $0,%rdx 1419 movq %rdx,%rcx 1420 1421 mulq 8(%rsi) 1422 addq %rcx,%r11 1423 adcq $0,%rdx 1424 addq %rax,%r11 1425 movq %rbp,%rax 1426 adcq $0,%rdx 1427 movq %rdx,%rcx 1428 1429 mulq 16(%rsi) 1430 addq %rcx,%r12 1431 adcq $0,%rdx 1432 addq %rax,%r12 1433 movq %rbp,%rax 1434 adcq $0,%rdx 1435 movq %rdx,%rcx 1436 1437 mulq 24(%rsi) 1438 addq %rcx,%r13 1439 adcq $0,%rdx 1440 addq %rax,%r13 1441 movq %r10,%rax 1442 adcq %rdx,%r8 1443 adcq $0,%r9 1444 1445 1446 1447 movq %r10,%rbp 1448 shlq $32,%r10 1449 mulq %r15 1450 shrq $32,%rbp 1451 addq %r10,%r11 1452 adcq %rbp,%r12 1453 adcq %rax,%r13 1454 movq 24(%rbx),%rax 1455 adcq %rdx,%r8 1456 adcq $0,%r9 1457 xorq %r10,%r10 1458 1459 1460 1461 movq %rax,%rbp 1462 mulq 0(%rsi) 1463 addq %rax,%r11 1464 movq %rbp,%rax 1465 adcq $0,%rdx 1466 movq %rdx,%rcx 1467 1468 mulq 8(%rsi) 1469 addq %rcx,%r12 1470 adcq $0,%rdx 1471 addq %rax,%r12 1472 movq %rbp,%rax 1473 adcq $0,%rdx 1474 movq %rdx,%rcx 1475 1476 mulq 16(%rsi) 1477 addq %rcx,%r13 1478 adcq $0,%rdx 1479 addq %rax,%r13 1480 movq %rbp,%rax 1481 adcq $0,%rdx 1482 movq %rdx,%rcx 1483 1484 mulq 24(%rsi) 1485 addq %rcx,%r8 1486 adcq $0,%rdx 1487 addq %rax,%r8 1488 movq %r11,%rax 1489 adcq %rdx,%r9 1490 adcq $0,%r10 1491 1492 1493 1494 movq %r11,%rbp 1495 shlq $32,%r11 1496 mulq %r15 1497 shrq $32,%rbp 1498 addq %r11,%r12 1499 adcq %rbp,%r13 1500 movq %r12,%rcx 1501 adcq %rax,%r8 1502 adcq %rdx,%r9 1503 movq %r13,%rbp 1504 adcq $0,%r10 1505 1506 1507 1508 subq $-1,%r12 1509 movq %r8,%rbx 1510 sbbq %r14,%r13 1511 sbbq $0,%r8 1512 movq %r9,%rdx 1513 sbbq %r15,%r9 1514 sbbq $0,%r10 1515 1516 cmovcq %rcx,%r12 1517 cmovcq %rbp,%r13 1518 movq %r12,0(%rdi) 1519 cmovcq %rbx,%r8 1520 movq %r13,8(%rdi) 1521 cmovcq %rdx,%r9 1522 movq %r8,16(%rdi) 1523 movq %r9,24(%rdi) 1524 1525 .byte 0xf3,0xc3 1526.cfi_endproc 1527.size __ecp_nistz256_mul_montq,.-__ecp_nistz256_mul_montq 1528 1529 1530 1531 1532 1533 1534 1535 1536.globl GFp_nistz256_sqr_mont 1537.hidden GFp_nistz256_sqr_mont 1538.type GFp_nistz256_sqr_mont,@function 1539.align 32 1540GFp_nistz256_sqr_mont: 1541.cfi_startproc 1542 leaq GFp_ia32cap_P(%rip),%rcx 1543 movq 8(%rcx),%rcx 1544 andl $0x80100,%ecx 1545 pushq %rbp 1546.cfi_adjust_cfa_offset 8 1547.cfi_offset %rbp,-16 1548 pushq %rbx 1549.cfi_adjust_cfa_offset 8 1550.cfi_offset %rbx,-24 1551 pushq %r12 1552.cfi_adjust_cfa_offset 8 1553.cfi_offset %r12,-32 1554 pushq %r13 1555.cfi_adjust_cfa_offset 8 1556.cfi_offset %r13,-40 1557 pushq %r14 1558.cfi_adjust_cfa_offset 8 1559.cfi_offset %r14,-48 1560 pushq %r15 1561.cfi_adjust_cfa_offset 8 1562.cfi_offset %r15,-56 1563.Lsqr_body: 1564 cmpl $0x80100,%ecx 1565 je .Lsqr_montx 1566 movq 0(%rsi),%rax 1567 movq 8(%rsi),%r14 1568 movq 16(%rsi),%r15 1569 movq 24(%rsi),%r8 1570 1571 call __ecp_nistz256_sqr_montq 1572 jmp .Lsqr_mont_done 1573 1574.align 32 1575.Lsqr_montx: 1576 movq 0(%rsi),%rdx 1577 movq 8(%rsi),%r14 1578 movq 16(%rsi),%r15 1579 movq 24(%rsi),%r8 1580 leaq -128(%rsi),%rsi 1581 1582 call __ecp_nistz256_sqr_montx 1583.Lsqr_mont_done: 1584 movq 0(%rsp),%r15 1585.cfi_restore %r15 1586 movq 8(%rsp),%r14 1587.cfi_restore %r14 1588 movq 16(%rsp),%r13 1589.cfi_restore %r13 1590 movq 24(%rsp),%r12 1591.cfi_restore %r12 1592 movq 32(%rsp),%rbx 1593.cfi_restore %rbx 1594 movq 40(%rsp),%rbp 1595.cfi_restore %rbp 1596 leaq 48(%rsp),%rsp 1597.cfi_adjust_cfa_offset -48 1598.Lsqr_epilogue: 1599 .byte 0xf3,0xc3 1600.cfi_endproc 1601.size GFp_nistz256_sqr_mont,.-GFp_nistz256_sqr_mont 1602 1603.type __ecp_nistz256_sqr_montq,@function 1604.align 32 1605__ecp_nistz256_sqr_montq: 1606.cfi_startproc 1607 movq %rax,%r13 1608 mulq %r14 1609 movq %rax,%r9 1610 movq %r15,%rax 1611 movq %rdx,%r10 1612 1613 mulq %r13 1614 addq %rax,%r10 1615 movq %r8,%rax 1616 adcq $0,%rdx 1617 movq %rdx,%r11 1618 1619 mulq %r13 1620 addq %rax,%r11 1621 movq %r15,%rax 1622 adcq $0,%rdx 1623 movq %rdx,%r12 1624 1625 1626 mulq %r14 1627 addq %rax,%r11 1628 movq %r8,%rax 1629 adcq $0,%rdx 1630 movq %rdx,%rbp 1631 1632 mulq %r14 1633 addq %rax,%r12 1634 movq %r8,%rax 1635 adcq $0,%rdx 1636 addq %rbp,%r12 1637 movq %rdx,%r13 1638 adcq $0,%r13 1639 1640 1641 mulq %r15 1642 xorq %r15,%r15 1643 addq %rax,%r13 1644 movq 0(%rsi),%rax 1645 movq %rdx,%r14 1646 adcq $0,%r14 1647 1648 addq %r9,%r9 1649 adcq %r10,%r10 1650 adcq %r11,%r11 1651 adcq %r12,%r12 1652 adcq %r13,%r13 1653 adcq %r14,%r14 1654 adcq $0,%r15 1655 1656 mulq %rax 1657 movq %rax,%r8 1658 movq 8(%rsi),%rax 1659 movq %rdx,%rcx 1660 1661 mulq %rax 1662 addq %rcx,%r9 1663 adcq %rax,%r10 1664 movq 16(%rsi),%rax 1665 adcq $0,%rdx 1666 movq %rdx,%rcx 1667 1668 mulq %rax 1669 addq %rcx,%r11 1670 adcq %rax,%r12 1671 movq 24(%rsi),%rax 1672 adcq $0,%rdx 1673 movq %rdx,%rcx 1674 1675 mulq %rax 1676 addq %rcx,%r13 1677 adcq %rax,%r14 1678 movq %r8,%rax 1679 adcq %rdx,%r15 1680 1681 movq .Lpoly+8(%rip),%rsi 1682 movq .Lpoly+24(%rip),%rbp 1683 1684 1685 1686 1687 movq %r8,%rcx 1688 shlq $32,%r8 1689 mulq %rbp 1690 shrq $32,%rcx 1691 addq %r8,%r9 1692 adcq %rcx,%r10 1693 adcq %rax,%r11 1694 movq %r9,%rax 1695 adcq $0,%rdx 1696 1697 1698 1699 movq %r9,%rcx 1700 shlq $32,%r9 1701 movq %rdx,%r8 1702 mulq %rbp 1703 shrq $32,%rcx 1704 addq %r9,%r10 1705 adcq %rcx,%r11 1706 adcq %rax,%r8 1707 movq %r10,%rax 1708 adcq $0,%rdx 1709 1710 1711 1712 movq %r10,%rcx 1713 shlq $32,%r10 1714 movq %rdx,%r9 1715 mulq %rbp 1716 shrq $32,%rcx 1717 addq %r10,%r11 1718 adcq %rcx,%r8 1719 adcq %rax,%r9 1720 movq %r11,%rax 1721 adcq $0,%rdx 1722 1723 1724 1725 movq %r11,%rcx 1726 shlq $32,%r11 1727 movq %rdx,%r10 1728 mulq %rbp 1729 shrq $32,%rcx 1730 addq %r11,%r8 1731 adcq %rcx,%r9 1732 adcq %rax,%r10 1733 adcq $0,%rdx 1734 xorq %r11,%r11 1735 1736 1737 1738 addq %r8,%r12 1739 adcq %r9,%r13 1740 movq %r12,%r8 1741 adcq %r10,%r14 1742 adcq %rdx,%r15 1743 movq %r13,%r9 1744 adcq $0,%r11 1745 1746 subq $-1,%r12 1747 movq %r14,%r10 1748 sbbq %rsi,%r13 1749 sbbq $0,%r14 1750 movq %r15,%rcx 1751 sbbq %rbp,%r15 1752 sbbq $0,%r11 1753 1754 cmovcq %r8,%r12 1755 cmovcq %r9,%r13 1756 movq %r12,0(%rdi) 1757 cmovcq %r10,%r14 1758 movq %r13,8(%rdi) 1759 cmovcq %rcx,%r15 1760 movq %r14,16(%rdi) 1761 movq %r15,24(%rdi) 1762 1763 .byte 0xf3,0xc3 1764.cfi_endproc 1765.size __ecp_nistz256_sqr_montq,.-__ecp_nistz256_sqr_montq 1766.type __ecp_nistz256_mul_montx,@function 1767.align 32 1768__ecp_nistz256_mul_montx: 1769.cfi_startproc 1770 1771 1772 mulxq %r9,%r8,%r9 1773 mulxq %r10,%rcx,%r10 1774 movq $32,%r14 1775 xorq %r13,%r13 1776 mulxq %r11,%rbp,%r11 1777 movq .Lpoly+24(%rip),%r15 1778 adcq %rcx,%r9 1779 mulxq %r12,%rcx,%r12 1780 movq %r8,%rdx 1781 adcq %rbp,%r10 1782 shlxq %r14,%r8,%rbp 1783 adcq %rcx,%r11 1784 shrxq %r14,%r8,%rcx 1785 adcq $0,%r12 1786 1787 1788 1789 addq %rbp,%r9 1790 adcq %rcx,%r10 1791 1792 mulxq %r15,%rcx,%rbp 1793 movq 8(%rbx),%rdx 1794 adcq %rcx,%r11 1795 adcq %rbp,%r12 1796 adcq $0,%r13 1797 xorq %r8,%r8 1798 1799 1800 1801 mulxq 0+128(%rsi),%rcx,%rbp 1802 adcxq %rcx,%r9 1803 adoxq %rbp,%r10 1804 1805 mulxq 8+128(%rsi),%rcx,%rbp 1806 adcxq %rcx,%r10 1807 adoxq %rbp,%r11 1808 1809 mulxq 16+128(%rsi),%rcx,%rbp 1810 adcxq %rcx,%r11 1811 adoxq %rbp,%r12 1812 1813 mulxq 24+128(%rsi),%rcx,%rbp 1814 movq %r9,%rdx 1815 adcxq %rcx,%r12 1816 shlxq %r14,%r9,%rcx 1817 adoxq %rbp,%r13 1818 shrxq %r14,%r9,%rbp 1819 1820 adcxq %r8,%r13 1821 adoxq %r8,%r8 1822 adcq $0,%r8 1823 1824 1825 1826 addq %rcx,%r10 1827 adcq %rbp,%r11 1828 1829 mulxq %r15,%rcx,%rbp 1830 movq 16(%rbx),%rdx 1831 adcq %rcx,%r12 1832 adcq %rbp,%r13 1833 adcq $0,%r8 1834 xorq %r9,%r9 1835 1836 1837 1838 mulxq 0+128(%rsi),%rcx,%rbp 1839 adcxq %rcx,%r10 1840 adoxq %rbp,%r11 1841 1842 mulxq 8+128(%rsi),%rcx,%rbp 1843 adcxq %rcx,%r11 1844 adoxq %rbp,%r12 1845 1846 mulxq 16+128(%rsi),%rcx,%rbp 1847 adcxq %rcx,%r12 1848 adoxq %rbp,%r13 1849 1850 mulxq 24+128(%rsi),%rcx,%rbp 1851 movq %r10,%rdx 1852 adcxq %rcx,%r13 1853 shlxq %r14,%r10,%rcx 1854 adoxq %rbp,%r8 1855 shrxq %r14,%r10,%rbp 1856 1857 adcxq %r9,%r8 1858 adoxq %r9,%r9 1859 adcq $0,%r9 1860 1861 1862 1863 addq %rcx,%r11 1864 adcq %rbp,%r12 1865 1866 mulxq %r15,%rcx,%rbp 1867 movq 24(%rbx),%rdx 1868 adcq %rcx,%r13 1869 adcq %rbp,%r8 1870 adcq $0,%r9 1871 xorq %r10,%r10 1872 1873 1874 1875 mulxq 0+128(%rsi),%rcx,%rbp 1876 adcxq %rcx,%r11 1877 adoxq %rbp,%r12 1878 1879 mulxq 8+128(%rsi),%rcx,%rbp 1880 adcxq %rcx,%r12 1881 adoxq %rbp,%r13 1882 1883 mulxq 16+128(%rsi),%rcx,%rbp 1884 adcxq %rcx,%r13 1885 adoxq %rbp,%r8 1886 1887 mulxq 24+128(%rsi),%rcx,%rbp 1888 movq %r11,%rdx 1889 adcxq %rcx,%r8 1890 shlxq %r14,%r11,%rcx 1891 adoxq %rbp,%r9 1892 shrxq %r14,%r11,%rbp 1893 1894 adcxq %r10,%r9 1895 adoxq %r10,%r10 1896 adcq $0,%r10 1897 1898 1899 1900 addq %rcx,%r12 1901 adcq %rbp,%r13 1902 1903 mulxq %r15,%rcx,%rbp 1904 movq %r12,%rbx 1905 movq .Lpoly+8(%rip),%r14 1906 adcq %rcx,%r8 1907 movq %r13,%rdx 1908 adcq %rbp,%r9 1909 adcq $0,%r10 1910 1911 1912 1913 xorl %eax,%eax 1914 movq %r8,%rcx 1915 sbbq $-1,%r12 1916 sbbq %r14,%r13 1917 sbbq $0,%r8 1918 movq %r9,%rbp 1919 sbbq %r15,%r9 1920 sbbq $0,%r10 1921 1922 cmovcq %rbx,%r12 1923 cmovcq %rdx,%r13 1924 movq %r12,0(%rdi) 1925 cmovcq %rcx,%r8 1926 movq %r13,8(%rdi) 1927 cmovcq %rbp,%r9 1928 movq %r8,16(%rdi) 1929 movq %r9,24(%rdi) 1930 1931 .byte 0xf3,0xc3 1932.cfi_endproc 1933.size __ecp_nistz256_mul_montx,.-__ecp_nistz256_mul_montx 1934 1935.type __ecp_nistz256_sqr_montx,@function 1936.align 32 1937__ecp_nistz256_sqr_montx: 1938.cfi_startproc 1939 mulxq %r14,%r9,%r10 1940 mulxq %r15,%rcx,%r11 1941 xorl %eax,%eax 1942 adcq %rcx,%r10 1943 mulxq %r8,%rbp,%r12 1944 movq %r14,%rdx 1945 adcq %rbp,%r11 1946 adcq $0,%r12 1947 xorq %r13,%r13 1948 1949 1950 mulxq %r15,%rcx,%rbp 1951 adcxq %rcx,%r11 1952 adoxq %rbp,%r12 1953 1954 mulxq %r8,%rcx,%rbp 1955 movq %r15,%rdx 1956 adcxq %rcx,%r12 1957 adoxq %rbp,%r13 1958 adcq $0,%r13 1959 1960 1961 mulxq %r8,%rcx,%r14 1962 movq 0+128(%rsi),%rdx 1963 xorq %r15,%r15 1964 adcxq %r9,%r9 1965 adoxq %rcx,%r13 1966 adcxq %r10,%r10 1967 adoxq %r15,%r14 1968 1969 mulxq %rdx,%r8,%rbp 1970 movq 8+128(%rsi),%rdx 1971 adcxq %r11,%r11 1972 adoxq %rbp,%r9 1973 adcxq %r12,%r12 1974 mulxq %rdx,%rcx,%rax 1975 movq 16+128(%rsi),%rdx 1976 adcxq %r13,%r13 1977 adoxq %rcx,%r10 1978 adcxq %r14,%r14 1979.byte 0x67 1980 mulxq %rdx,%rcx,%rbp 1981 movq 24+128(%rsi),%rdx 1982 adoxq %rax,%r11 1983 adcxq %r15,%r15 1984 adoxq %rcx,%r12 1985 movq $32,%rsi 1986 adoxq %rbp,%r13 1987.byte 0x67,0x67 1988 mulxq %rdx,%rcx,%rax 1989 movq .Lpoly+24(%rip),%rdx 1990 adoxq %rcx,%r14 1991 shlxq %rsi,%r8,%rcx 1992 adoxq %rax,%r15 1993 shrxq %rsi,%r8,%rax 1994 movq %rdx,%rbp 1995 1996 1997 addq %rcx,%r9 1998 adcq %rax,%r10 1999 2000 mulxq %r8,%rcx,%r8 2001 adcq %rcx,%r11 2002 shlxq %rsi,%r9,%rcx 2003 adcq $0,%r8 2004 shrxq %rsi,%r9,%rax 2005 2006 2007 addq %rcx,%r10 2008 adcq %rax,%r11 2009 2010 mulxq %r9,%rcx,%r9 2011 adcq %rcx,%r8 2012 shlxq %rsi,%r10,%rcx 2013 adcq $0,%r9 2014 shrxq %rsi,%r10,%rax 2015 2016 2017 addq %rcx,%r11 2018 adcq %rax,%r8 2019 2020 mulxq %r10,%rcx,%r10 2021 adcq %rcx,%r9 2022 shlxq %rsi,%r11,%rcx 2023 adcq $0,%r10 2024 shrxq %rsi,%r11,%rax 2025 2026 2027 addq %rcx,%r8 2028 adcq %rax,%r9 2029 2030 mulxq %r11,%rcx,%r11 2031 adcq %rcx,%r10 2032 adcq $0,%r11 2033 2034 xorq %rdx,%rdx 2035 addq %r8,%r12 2036 movq .Lpoly+8(%rip),%rsi 2037 adcq %r9,%r13 2038 movq %r12,%r8 2039 adcq %r10,%r14 2040 adcq %r11,%r15 2041 movq %r13,%r9 2042 adcq $0,%rdx 2043 2044 subq $-1,%r12 2045 movq %r14,%r10 2046 sbbq %rsi,%r13 2047 sbbq $0,%r14 2048 movq %r15,%r11 2049 sbbq %rbp,%r15 2050 sbbq $0,%rdx 2051 2052 cmovcq %r8,%r12 2053 cmovcq %r9,%r13 2054 movq %r12,0(%rdi) 2055 cmovcq %r10,%r14 2056 movq %r13,8(%rdi) 2057 cmovcq %r11,%r15 2058 movq %r14,16(%rdi) 2059 movq %r15,24(%rdi) 2060 2061 .byte 0xf3,0xc3 2062.cfi_endproc 2063.size __ecp_nistz256_sqr_montx,.-__ecp_nistz256_sqr_montx 2064 2065 2066.globl GFp_nistz256_select_w5 2067.hidden GFp_nistz256_select_w5 2068.type GFp_nistz256_select_w5,@function 2069.align 32 2070GFp_nistz256_select_w5: 2071.cfi_startproc 2072 leaq GFp_ia32cap_P(%rip),%rax 2073 movq 8(%rax),%rax 2074 testl $32,%eax 2075 jnz .Lavx2_select_w5 2076 movdqa .LOne(%rip),%xmm0 2077 movd %edx,%xmm1 2078 2079 pxor %xmm2,%xmm2 2080 pxor %xmm3,%xmm3 2081 pxor %xmm4,%xmm4 2082 pxor %xmm5,%xmm5 2083 pxor %xmm6,%xmm6 2084 pxor %xmm7,%xmm7 2085 2086 movdqa %xmm0,%xmm8 2087 pshufd $0,%xmm1,%xmm1 2088 2089 movq $16,%rax 2090.Lselect_loop_sse_w5: 2091 2092 movdqa %xmm8,%xmm15 2093 paddd %xmm0,%xmm8 2094 pcmpeqd %xmm1,%xmm15 2095 2096 movdqa 0(%rsi),%xmm9 2097 movdqa 16(%rsi),%xmm10 2098 movdqa 32(%rsi),%xmm11 2099 movdqa 48(%rsi),%xmm12 2100 movdqa 64(%rsi),%xmm13 2101 movdqa 80(%rsi),%xmm14 2102 leaq 96(%rsi),%rsi 2103 2104 pand %xmm15,%xmm9 2105 pand %xmm15,%xmm10 2106 por %xmm9,%xmm2 2107 pand %xmm15,%xmm11 2108 por %xmm10,%xmm3 2109 pand %xmm15,%xmm12 2110 por %xmm11,%xmm4 2111 pand %xmm15,%xmm13 2112 por %xmm12,%xmm5 2113 pand %xmm15,%xmm14 2114 por %xmm13,%xmm6 2115 por %xmm14,%xmm7 2116 2117 decq %rax 2118 jnz .Lselect_loop_sse_w5 2119 2120 movdqu %xmm2,0(%rdi) 2121 movdqu %xmm3,16(%rdi) 2122 movdqu %xmm4,32(%rdi) 2123 movdqu %xmm5,48(%rdi) 2124 movdqu %xmm6,64(%rdi) 2125 movdqu %xmm7,80(%rdi) 2126 .byte 0xf3,0xc3 2127.cfi_endproc 2128.LSEH_end_GFp_nistz256_select_w5: 2129.size GFp_nistz256_select_w5,.-GFp_nistz256_select_w5 2130 2131 2132 2133.globl GFp_nistz256_select_w7 2134.hidden GFp_nistz256_select_w7 2135.type GFp_nistz256_select_w7,@function 2136.align 32 2137GFp_nistz256_select_w7: 2138.cfi_startproc 2139 leaq GFp_ia32cap_P(%rip),%rax 2140 movq 8(%rax),%rax 2141 testl $32,%eax 2142 jnz .Lavx2_select_w7 2143 movdqa .LOne(%rip),%xmm8 2144 movd %edx,%xmm1 2145 2146 pxor %xmm2,%xmm2 2147 pxor %xmm3,%xmm3 2148 pxor %xmm4,%xmm4 2149 pxor %xmm5,%xmm5 2150 2151 movdqa %xmm8,%xmm0 2152 pshufd $0,%xmm1,%xmm1 2153 movq $64,%rax 2154 2155.Lselect_loop_sse_w7: 2156 movdqa %xmm8,%xmm15 2157 paddd %xmm0,%xmm8 2158 movdqa 0(%rsi),%xmm9 2159 movdqa 16(%rsi),%xmm10 2160 pcmpeqd %xmm1,%xmm15 2161 movdqa 32(%rsi),%xmm11 2162 movdqa 48(%rsi),%xmm12 2163 leaq 64(%rsi),%rsi 2164 2165 pand %xmm15,%xmm9 2166 pand %xmm15,%xmm10 2167 por %xmm9,%xmm2 2168 pand %xmm15,%xmm11 2169 por %xmm10,%xmm3 2170 pand %xmm15,%xmm12 2171 por %xmm11,%xmm4 2172 prefetcht0 255(%rsi) 2173 por %xmm12,%xmm5 2174 2175 decq %rax 2176 jnz .Lselect_loop_sse_w7 2177 2178 movdqu %xmm2,0(%rdi) 2179 movdqu %xmm3,16(%rdi) 2180 movdqu %xmm4,32(%rdi) 2181 movdqu %xmm5,48(%rdi) 2182 .byte 0xf3,0xc3 2183.cfi_endproc 2184.LSEH_end_GFp_nistz256_select_w7: 2185.size GFp_nistz256_select_w7,.-GFp_nistz256_select_w7 2186 2187 2188.type GFp_nistz256_avx2_select_w5,@function 2189.align 32 2190GFp_nistz256_avx2_select_w5: 2191.cfi_startproc 2192.Lavx2_select_w5: 2193 vzeroupper 2194 vmovdqa .LTwo(%rip),%ymm0 2195 2196 vpxor %ymm2,%ymm2,%ymm2 2197 vpxor %ymm3,%ymm3,%ymm3 2198 vpxor %ymm4,%ymm4,%ymm4 2199 2200 vmovdqa .LOne(%rip),%ymm5 2201 vmovdqa .LTwo(%rip),%ymm10 2202 2203 vmovd %edx,%xmm1 2204 vpermd %ymm1,%ymm2,%ymm1 2205 2206 movq $8,%rax 2207.Lselect_loop_avx2_w5: 2208 2209 vmovdqa 0(%rsi),%ymm6 2210 vmovdqa 32(%rsi),%ymm7 2211 vmovdqa 64(%rsi),%ymm8 2212 2213 vmovdqa 96(%rsi),%ymm11 2214 vmovdqa 128(%rsi),%ymm12 2215 vmovdqa 160(%rsi),%ymm13 2216 2217 vpcmpeqd %ymm1,%ymm5,%ymm9 2218 vpcmpeqd %ymm1,%ymm10,%ymm14 2219 2220 vpaddd %ymm0,%ymm5,%ymm5 2221 vpaddd %ymm0,%ymm10,%ymm10 2222 leaq 192(%rsi),%rsi 2223 2224 vpand %ymm9,%ymm6,%ymm6 2225 vpand %ymm9,%ymm7,%ymm7 2226 vpand %ymm9,%ymm8,%ymm8 2227 vpand %ymm14,%ymm11,%ymm11 2228 vpand %ymm14,%ymm12,%ymm12 2229 vpand %ymm14,%ymm13,%ymm13 2230 2231 vpxor %ymm6,%ymm2,%ymm2 2232 vpxor %ymm7,%ymm3,%ymm3 2233 vpxor %ymm8,%ymm4,%ymm4 2234 vpxor %ymm11,%ymm2,%ymm2 2235 vpxor %ymm12,%ymm3,%ymm3 2236 vpxor %ymm13,%ymm4,%ymm4 2237 2238 decq %rax 2239 jnz .Lselect_loop_avx2_w5 2240 2241 vmovdqu %ymm2,0(%rdi) 2242 vmovdqu %ymm3,32(%rdi) 2243 vmovdqu %ymm4,64(%rdi) 2244 vzeroupper 2245 .byte 0xf3,0xc3 2246.cfi_endproc 2247.LSEH_end_GFp_nistz256_avx2_select_w5: 2248.size GFp_nistz256_avx2_select_w5,.-GFp_nistz256_avx2_select_w5 2249 2250 2251 2252.globl GFp_nistz256_avx2_select_w7 2253.hidden GFp_nistz256_avx2_select_w7 2254.type GFp_nistz256_avx2_select_w7,@function 2255.align 32 2256GFp_nistz256_avx2_select_w7: 2257.cfi_startproc 2258.Lavx2_select_w7: 2259 vzeroupper 2260 vmovdqa .LThree(%rip),%ymm0 2261 2262 vpxor %ymm2,%ymm2,%ymm2 2263 vpxor %ymm3,%ymm3,%ymm3 2264 2265 vmovdqa .LOne(%rip),%ymm4 2266 vmovdqa .LTwo(%rip),%ymm8 2267 vmovdqa .LThree(%rip),%ymm12 2268 2269 vmovd %edx,%xmm1 2270 vpermd %ymm1,%ymm2,%ymm1 2271 2272 2273 movq $21,%rax 2274.Lselect_loop_avx2_w7: 2275 2276 vmovdqa 0(%rsi),%ymm5 2277 vmovdqa 32(%rsi),%ymm6 2278 2279 vmovdqa 64(%rsi),%ymm9 2280 vmovdqa 96(%rsi),%ymm10 2281 2282 vmovdqa 128(%rsi),%ymm13 2283 vmovdqa 160(%rsi),%ymm14 2284 2285 vpcmpeqd %ymm1,%ymm4,%ymm7 2286 vpcmpeqd %ymm1,%ymm8,%ymm11 2287 vpcmpeqd %ymm1,%ymm12,%ymm15 2288 2289 vpaddd %ymm0,%ymm4,%ymm4 2290 vpaddd %ymm0,%ymm8,%ymm8 2291 vpaddd %ymm0,%ymm12,%ymm12 2292 leaq 192(%rsi),%rsi 2293 2294 vpand %ymm7,%ymm5,%ymm5 2295 vpand %ymm7,%ymm6,%ymm6 2296 vpand %ymm11,%ymm9,%ymm9 2297 vpand %ymm11,%ymm10,%ymm10 2298 vpand %ymm15,%ymm13,%ymm13 2299 vpand %ymm15,%ymm14,%ymm14 2300 2301 vpxor %ymm5,%ymm2,%ymm2 2302 vpxor %ymm6,%ymm3,%ymm3 2303 vpxor %ymm9,%ymm2,%ymm2 2304 vpxor %ymm10,%ymm3,%ymm3 2305 vpxor %ymm13,%ymm2,%ymm2 2306 vpxor %ymm14,%ymm3,%ymm3 2307 2308 decq %rax 2309 jnz .Lselect_loop_avx2_w7 2310 2311 2312 vmovdqa 0(%rsi),%ymm5 2313 vmovdqa 32(%rsi),%ymm6 2314 2315 vpcmpeqd %ymm1,%ymm4,%ymm7 2316 2317 vpand %ymm7,%ymm5,%ymm5 2318 vpand %ymm7,%ymm6,%ymm6 2319 2320 vpxor %ymm5,%ymm2,%ymm2 2321 vpxor %ymm6,%ymm3,%ymm3 2322 2323 vmovdqu %ymm2,0(%rdi) 2324 vmovdqu %ymm3,32(%rdi) 2325 vzeroupper 2326 .byte 0xf3,0xc3 2327.cfi_endproc 2328.LSEH_end_GFp_nistz256_avx2_select_w7: 2329.size GFp_nistz256_avx2_select_w7,.-GFp_nistz256_avx2_select_w7 2330.type __ecp_nistz256_add_toq,@function 2331.align 32 2332__ecp_nistz256_add_toq: 2333.cfi_startproc 2334 xorq %r11,%r11 2335 addq 0(%rbx),%r12 2336 adcq 8(%rbx),%r13 2337 movq %r12,%rax 2338 adcq 16(%rbx),%r8 2339 adcq 24(%rbx),%r9 2340 movq %r13,%rbp 2341 adcq $0,%r11 2342 2343 subq $-1,%r12 2344 movq %r8,%rcx 2345 sbbq %r14,%r13 2346 sbbq $0,%r8 2347 movq %r9,%r10 2348 sbbq %r15,%r9 2349 sbbq $0,%r11 2350 2351 cmovcq %rax,%r12 2352 cmovcq %rbp,%r13 2353 movq %r12,0(%rdi) 2354 cmovcq %rcx,%r8 2355 movq %r13,8(%rdi) 2356 cmovcq %r10,%r9 2357 movq %r8,16(%rdi) 2358 movq %r9,24(%rdi) 2359 2360 .byte 0xf3,0xc3 2361.cfi_endproc 2362.size __ecp_nistz256_add_toq,.-__ecp_nistz256_add_toq 2363 2364.type __ecp_nistz256_sub_fromq,@function 2365.align 32 2366__ecp_nistz256_sub_fromq: 2367.cfi_startproc 2368 subq 0(%rbx),%r12 2369 sbbq 8(%rbx),%r13 2370 movq %r12,%rax 2371 sbbq 16(%rbx),%r8 2372 sbbq 24(%rbx),%r9 2373 movq %r13,%rbp 2374 sbbq %r11,%r11 2375 2376 addq $-1,%r12 2377 movq %r8,%rcx 2378 adcq %r14,%r13 2379 adcq $0,%r8 2380 movq %r9,%r10 2381 adcq %r15,%r9 2382 testq %r11,%r11 2383 2384 cmovzq %rax,%r12 2385 cmovzq %rbp,%r13 2386 movq %r12,0(%rdi) 2387 cmovzq %rcx,%r8 2388 movq %r13,8(%rdi) 2389 cmovzq %r10,%r9 2390 movq %r8,16(%rdi) 2391 movq %r9,24(%rdi) 2392 2393 .byte 0xf3,0xc3 2394.cfi_endproc 2395.size __ecp_nistz256_sub_fromq,.-__ecp_nistz256_sub_fromq 2396 2397.type __ecp_nistz256_subq,@function 2398.align 32 2399__ecp_nistz256_subq: 2400.cfi_startproc 2401 subq %r12,%rax 2402 sbbq %r13,%rbp 2403 movq %rax,%r12 2404 sbbq %r8,%rcx 2405 sbbq %r9,%r10 2406 movq %rbp,%r13 2407 sbbq %r11,%r11 2408 2409 addq $-1,%rax 2410 movq %rcx,%r8 2411 adcq %r14,%rbp 2412 adcq $0,%rcx 2413 movq %r10,%r9 2414 adcq %r15,%r10 2415 testq %r11,%r11 2416 2417 cmovnzq %rax,%r12 2418 cmovnzq %rbp,%r13 2419 cmovnzq %rcx,%r8 2420 cmovnzq %r10,%r9 2421 2422 .byte 0xf3,0xc3 2423.cfi_endproc 2424.size __ecp_nistz256_subq,.-__ecp_nistz256_subq 2425 2426.type __ecp_nistz256_mul_by_2q,@function 2427.align 32 2428__ecp_nistz256_mul_by_2q: 2429.cfi_startproc 2430 xorq %r11,%r11 2431 addq %r12,%r12 2432 adcq %r13,%r13 2433 movq %r12,%rax 2434 adcq %r8,%r8 2435 adcq %r9,%r9 2436 movq %r13,%rbp 2437 adcq $0,%r11 2438 2439 subq $-1,%r12 2440 movq %r8,%rcx 2441 sbbq %r14,%r13 2442 sbbq $0,%r8 2443 movq %r9,%r10 2444 sbbq %r15,%r9 2445 sbbq $0,%r11 2446 2447 cmovcq %rax,%r12 2448 cmovcq %rbp,%r13 2449 movq %r12,0(%rdi) 2450 cmovcq %rcx,%r8 2451 movq %r13,8(%rdi) 2452 cmovcq %r10,%r9 2453 movq %r8,16(%rdi) 2454 movq %r9,24(%rdi) 2455 2456 .byte 0xf3,0xc3 2457.cfi_endproc 2458.size __ecp_nistz256_mul_by_2q,.-__ecp_nistz256_mul_by_2q 2459.globl GFp_nistz256_point_double 2460.hidden GFp_nistz256_point_double 2461.type GFp_nistz256_point_double,@function 2462.align 32 2463GFp_nistz256_point_double: 2464.cfi_startproc 2465 leaq GFp_ia32cap_P(%rip),%rcx 2466 movq 8(%rcx),%rcx 2467 andl $0x80100,%ecx 2468 cmpl $0x80100,%ecx 2469 je .Lpoint_doublex 2470 pushq %rbp 2471.cfi_adjust_cfa_offset 8 2472.cfi_offset %rbp,-16 2473 pushq %rbx 2474.cfi_adjust_cfa_offset 8 2475.cfi_offset %rbx,-24 2476 pushq %r12 2477.cfi_adjust_cfa_offset 8 2478.cfi_offset %r12,-32 2479 pushq %r13 2480.cfi_adjust_cfa_offset 8 2481.cfi_offset %r13,-40 2482 pushq %r14 2483.cfi_adjust_cfa_offset 8 2484.cfi_offset %r14,-48 2485 pushq %r15 2486.cfi_adjust_cfa_offset 8 2487.cfi_offset %r15,-56 2488 subq $160+8,%rsp 2489.cfi_adjust_cfa_offset 32*5+8 2490.Lpoint_doubleq_body: 2491 2492.Lpoint_double_shortcutq: 2493 movdqu 0(%rsi),%xmm0 2494 movq %rsi,%rbx 2495 movdqu 16(%rsi),%xmm1 2496 movq 32+0(%rsi),%r12 2497 movq 32+8(%rsi),%r13 2498 movq 32+16(%rsi),%r8 2499 movq 32+24(%rsi),%r9 2500 movq .Lpoly+8(%rip),%r14 2501 movq .Lpoly+24(%rip),%r15 2502 movdqa %xmm0,96(%rsp) 2503 movdqa %xmm1,96+16(%rsp) 2504 leaq 32(%rdi),%r10 2505 leaq 64(%rdi),%r11 2506.byte 102,72,15,110,199 2507.byte 102,73,15,110,202 2508.byte 102,73,15,110,211 2509 2510 leaq 0(%rsp),%rdi 2511 call __ecp_nistz256_mul_by_2q 2512 2513 movq 64+0(%rsi),%rax 2514 movq 64+8(%rsi),%r14 2515 movq 64+16(%rsi),%r15 2516 movq 64+24(%rsi),%r8 2517 leaq 64-0(%rsi),%rsi 2518 leaq 64(%rsp),%rdi 2519 call __ecp_nistz256_sqr_montq 2520 2521 movq 0+0(%rsp),%rax 2522 movq 8+0(%rsp),%r14 2523 leaq 0+0(%rsp),%rsi 2524 movq 16+0(%rsp),%r15 2525 movq 24+0(%rsp),%r8 2526 leaq 0(%rsp),%rdi 2527 call __ecp_nistz256_sqr_montq 2528 2529 movq 32(%rbx),%rax 2530 movq 64+0(%rbx),%r9 2531 movq 64+8(%rbx),%r10 2532 movq 64+16(%rbx),%r11 2533 movq 64+24(%rbx),%r12 2534 leaq 64-0(%rbx),%rsi 2535 leaq 32(%rbx),%rbx 2536.byte 102,72,15,126,215 2537 call __ecp_nistz256_mul_montq 2538 call __ecp_nistz256_mul_by_2q 2539 2540 movq 96+0(%rsp),%r12 2541 movq 96+8(%rsp),%r13 2542 leaq 64(%rsp),%rbx 2543 movq 96+16(%rsp),%r8 2544 movq 96+24(%rsp),%r9 2545 leaq 32(%rsp),%rdi 2546 call __ecp_nistz256_add_toq 2547 2548 movq 96+0(%rsp),%r12 2549 movq 96+8(%rsp),%r13 2550 leaq 64(%rsp),%rbx 2551 movq 96+16(%rsp),%r8 2552 movq 96+24(%rsp),%r9 2553 leaq 64(%rsp),%rdi 2554 call __ecp_nistz256_sub_fromq 2555 2556 movq 0+0(%rsp),%rax 2557 movq 8+0(%rsp),%r14 2558 leaq 0+0(%rsp),%rsi 2559 movq 16+0(%rsp),%r15 2560 movq 24+0(%rsp),%r8 2561.byte 102,72,15,126,207 2562 call __ecp_nistz256_sqr_montq 2563 xorq %r9,%r9 2564 movq %r12,%rax 2565 addq $-1,%r12 2566 movq %r13,%r10 2567 adcq %rsi,%r13 2568 movq %r14,%rcx 2569 adcq $0,%r14 2570 movq %r15,%r8 2571 adcq %rbp,%r15 2572 adcq $0,%r9 2573 xorq %rsi,%rsi 2574 testq $1,%rax 2575 2576 cmovzq %rax,%r12 2577 cmovzq %r10,%r13 2578 cmovzq %rcx,%r14 2579 cmovzq %r8,%r15 2580 cmovzq %rsi,%r9 2581 2582 movq %r13,%rax 2583 shrq $1,%r12 2584 shlq $63,%rax 2585 movq %r14,%r10 2586 shrq $1,%r13 2587 orq %rax,%r12 2588 shlq $63,%r10 2589 movq %r15,%rcx 2590 shrq $1,%r14 2591 orq %r10,%r13 2592 shlq $63,%rcx 2593 movq %r12,0(%rdi) 2594 shrq $1,%r15 2595 movq %r13,8(%rdi) 2596 shlq $63,%r9 2597 orq %rcx,%r14 2598 orq %r9,%r15 2599 movq %r14,16(%rdi) 2600 movq %r15,24(%rdi) 2601 movq 64(%rsp),%rax 2602 leaq 64(%rsp),%rbx 2603 movq 0+32(%rsp),%r9 2604 movq 8+32(%rsp),%r10 2605 leaq 0+32(%rsp),%rsi 2606 movq 16+32(%rsp),%r11 2607 movq 24+32(%rsp),%r12 2608 leaq 32(%rsp),%rdi 2609 call __ecp_nistz256_mul_montq 2610 2611 leaq 128(%rsp),%rdi 2612 call __ecp_nistz256_mul_by_2q 2613 2614 leaq 32(%rsp),%rbx 2615 leaq 32(%rsp),%rdi 2616 call __ecp_nistz256_add_toq 2617 2618 movq 96(%rsp),%rax 2619 leaq 96(%rsp),%rbx 2620 movq 0+0(%rsp),%r9 2621 movq 8+0(%rsp),%r10 2622 leaq 0+0(%rsp),%rsi 2623 movq 16+0(%rsp),%r11 2624 movq 24+0(%rsp),%r12 2625 leaq 0(%rsp),%rdi 2626 call __ecp_nistz256_mul_montq 2627 2628 leaq 128(%rsp),%rdi 2629 call __ecp_nistz256_mul_by_2q 2630 2631 movq 0+32(%rsp),%rax 2632 movq 8+32(%rsp),%r14 2633 leaq 0+32(%rsp),%rsi 2634 movq 16+32(%rsp),%r15 2635 movq 24+32(%rsp),%r8 2636.byte 102,72,15,126,199 2637 call __ecp_nistz256_sqr_montq 2638 2639 leaq 128(%rsp),%rbx 2640 movq %r14,%r8 2641 movq %r15,%r9 2642 movq %rsi,%r14 2643 movq %rbp,%r15 2644 call __ecp_nistz256_sub_fromq 2645 2646 movq 0+0(%rsp),%rax 2647 movq 0+8(%rsp),%rbp 2648 movq 0+16(%rsp),%rcx 2649 movq 0+24(%rsp),%r10 2650 leaq 0(%rsp),%rdi 2651 call __ecp_nistz256_subq 2652 2653 movq 32(%rsp),%rax 2654 leaq 32(%rsp),%rbx 2655 movq %r12,%r14 2656 xorl %ecx,%ecx 2657 movq %r12,0+0(%rsp) 2658 movq %r13,%r10 2659 movq %r13,0+8(%rsp) 2660 cmovzq %r8,%r11 2661 movq %r8,0+16(%rsp) 2662 leaq 0-0(%rsp),%rsi 2663 cmovzq %r9,%r12 2664 movq %r9,0+24(%rsp) 2665 movq %r14,%r9 2666 leaq 0(%rsp),%rdi 2667 call __ecp_nistz256_mul_montq 2668 2669.byte 102,72,15,126,203 2670.byte 102,72,15,126,207 2671 call __ecp_nistz256_sub_fromq 2672 2673 leaq 160+56(%rsp),%rsi 2674.cfi_def_cfa %rsi,8 2675 movq -48(%rsi),%r15 2676.cfi_restore %r15 2677 movq -40(%rsi),%r14 2678.cfi_restore %r14 2679 movq -32(%rsi),%r13 2680.cfi_restore %r13 2681 movq -24(%rsi),%r12 2682.cfi_restore %r12 2683 movq -16(%rsi),%rbx 2684.cfi_restore %rbx 2685 movq -8(%rsi),%rbp 2686.cfi_restore %rbp 2687 leaq (%rsi),%rsp 2688.cfi_def_cfa_register %rsp 2689.Lpoint_doubleq_epilogue: 2690 .byte 0xf3,0xc3 2691.cfi_endproc 2692.size GFp_nistz256_point_double,.-GFp_nistz256_point_double 2693.globl GFp_nistz256_point_add 2694.hidden GFp_nistz256_point_add 2695.type GFp_nistz256_point_add,@function 2696.align 32 2697GFp_nistz256_point_add: 2698.cfi_startproc 2699 leaq GFp_ia32cap_P(%rip),%rcx 2700 movq 8(%rcx),%rcx 2701 andl $0x80100,%ecx 2702 cmpl $0x80100,%ecx 2703 je .Lpoint_addx 2704 pushq %rbp 2705.cfi_adjust_cfa_offset 8 2706.cfi_offset %rbp,-16 2707 pushq %rbx 2708.cfi_adjust_cfa_offset 8 2709.cfi_offset %rbx,-24 2710 pushq %r12 2711.cfi_adjust_cfa_offset 8 2712.cfi_offset %r12,-32 2713 pushq %r13 2714.cfi_adjust_cfa_offset 8 2715.cfi_offset %r13,-40 2716 pushq %r14 2717.cfi_adjust_cfa_offset 8 2718.cfi_offset %r14,-48 2719 pushq %r15 2720.cfi_adjust_cfa_offset 8 2721.cfi_offset %r15,-56 2722 subq $576+8,%rsp 2723.cfi_adjust_cfa_offset 32*18+8 2724.Lpoint_addq_body: 2725 2726 movdqu 0(%rsi),%xmm0 2727 movdqu 16(%rsi),%xmm1 2728 movdqu 32(%rsi),%xmm2 2729 movdqu 48(%rsi),%xmm3 2730 movdqu 64(%rsi),%xmm4 2731 movdqu 80(%rsi),%xmm5 2732 movq %rsi,%rbx 2733 movq %rdx,%rsi 2734 movdqa %xmm0,384(%rsp) 2735 movdqa %xmm1,384+16(%rsp) 2736 movdqa %xmm2,416(%rsp) 2737 movdqa %xmm3,416+16(%rsp) 2738 movdqa %xmm4,448(%rsp) 2739 movdqa %xmm5,448+16(%rsp) 2740 por %xmm4,%xmm5 2741 2742 movdqu 0(%rsi),%xmm0 2743 pshufd $0xb1,%xmm5,%xmm3 2744 movdqu 16(%rsi),%xmm1 2745 movdqu 32(%rsi),%xmm2 2746 por %xmm3,%xmm5 2747 movdqu 48(%rsi),%xmm3 2748 movq 64+0(%rsi),%rax 2749 movq 64+8(%rsi),%r14 2750 movq 64+16(%rsi),%r15 2751 movq 64+24(%rsi),%r8 2752 movdqa %xmm0,480(%rsp) 2753 pshufd $0x1e,%xmm5,%xmm4 2754 movdqa %xmm1,480+16(%rsp) 2755 movdqu 64(%rsi),%xmm0 2756 movdqu 80(%rsi),%xmm1 2757 movdqa %xmm2,512(%rsp) 2758 movdqa %xmm3,512+16(%rsp) 2759 por %xmm4,%xmm5 2760 pxor %xmm4,%xmm4 2761 por %xmm0,%xmm1 2762.byte 102,72,15,110,199 2763 2764 leaq 64-0(%rsi),%rsi 2765 movq %rax,544+0(%rsp) 2766 movq %r14,544+8(%rsp) 2767 movq %r15,544+16(%rsp) 2768 movq %r8,544+24(%rsp) 2769 leaq 96(%rsp),%rdi 2770 call __ecp_nistz256_sqr_montq 2771 2772 pcmpeqd %xmm4,%xmm5 2773 pshufd $0xb1,%xmm1,%xmm4 2774 por %xmm1,%xmm4 2775 pshufd $0,%xmm5,%xmm5 2776 pshufd $0x1e,%xmm4,%xmm3 2777 por %xmm3,%xmm4 2778 pxor %xmm3,%xmm3 2779 pcmpeqd %xmm3,%xmm4 2780 pshufd $0,%xmm4,%xmm4 2781 movq 64+0(%rbx),%rax 2782 movq 64+8(%rbx),%r14 2783 movq 64+16(%rbx),%r15 2784 movq 64+24(%rbx),%r8 2785.byte 102,72,15,110,203 2786 2787 leaq 64-0(%rbx),%rsi 2788 leaq 32(%rsp),%rdi 2789 call __ecp_nistz256_sqr_montq 2790 2791 movq 544(%rsp),%rax 2792 leaq 544(%rsp),%rbx 2793 movq 0+96(%rsp),%r9 2794 movq 8+96(%rsp),%r10 2795 leaq 0+96(%rsp),%rsi 2796 movq 16+96(%rsp),%r11 2797 movq 24+96(%rsp),%r12 2798 leaq 224(%rsp),%rdi 2799 call __ecp_nistz256_mul_montq 2800 2801 movq 448(%rsp),%rax 2802 leaq 448(%rsp),%rbx 2803 movq 0+32(%rsp),%r9 2804 movq 8+32(%rsp),%r10 2805 leaq 0+32(%rsp),%rsi 2806 movq 16+32(%rsp),%r11 2807 movq 24+32(%rsp),%r12 2808 leaq 256(%rsp),%rdi 2809 call __ecp_nistz256_mul_montq 2810 2811 movq 416(%rsp),%rax 2812 leaq 416(%rsp),%rbx 2813 movq 0+224(%rsp),%r9 2814 movq 8+224(%rsp),%r10 2815 leaq 0+224(%rsp),%rsi 2816 movq 16+224(%rsp),%r11 2817 movq 24+224(%rsp),%r12 2818 leaq 224(%rsp),%rdi 2819 call __ecp_nistz256_mul_montq 2820 2821 movq 512(%rsp),%rax 2822 leaq 512(%rsp),%rbx 2823 movq 0+256(%rsp),%r9 2824 movq 8+256(%rsp),%r10 2825 leaq 0+256(%rsp),%rsi 2826 movq 16+256(%rsp),%r11 2827 movq 24+256(%rsp),%r12 2828 leaq 256(%rsp),%rdi 2829 call __ecp_nistz256_mul_montq 2830 2831 leaq 224(%rsp),%rbx 2832 leaq 64(%rsp),%rdi 2833 call __ecp_nistz256_sub_fromq 2834 2835 orq %r13,%r12 2836 movdqa %xmm4,%xmm2 2837 orq %r8,%r12 2838 orq %r9,%r12 2839 por %xmm5,%xmm2 2840.byte 102,73,15,110,220 2841 2842 movq 384(%rsp),%rax 2843 leaq 384(%rsp),%rbx 2844 movq 0+96(%rsp),%r9 2845 movq 8+96(%rsp),%r10 2846 leaq 0+96(%rsp),%rsi 2847 movq 16+96(%rsp),%r11 2848 movq 24+96(%rsp),%r12 2849 leaq 160(%rsp),%rdi 2850 call __ecp_nistz256_mul_montq 2851 2852 movq 480(%rsp),%rax 2853 leaq 480(%rsp),%rbx 2854 movq 0+32(%rsp),%r9 2855 movq 8+32(%rsp),%r10 2856 leaq 0+32(%rsp),%rsi 2857 movq 16+32(%rsp),%r11 2858 movq 24+32(%rsp),%r12 2859 leaq 192(%rsp),%rdi 2860 call __ecp_nistz256_mul_montq 2861 2862 leaq 160(%rsp),%rbx 2863 leaq 0(%rsp),%rdi 2864 call __ecp_nistz256_sub_fromq 2865 2866 orq %r13,%r12 2867 orq %r8,%r12 2868 orq %r9,%r12 2869 2870.byte 102,73,15,126,208 2871.byte 102,73,15,126,217 2872 orq %r8,%r12 2873.byte 0x3e 2874 jnz .Ladd_proceedq 2875 2876 2877 2878 testq %r9,%r9 2879 jz .Ladd_doubleq 2880 2881 2882 2883 2884 2885 2886.byte 102,72,15,126,199 2887 pxor %xmm0,%xmm0 2888 movdqu %xmm0,0(%rdi) 2889 movdqu %xmm0,16(%rdi) 2890 movdqu %xmm0,32(%rdi) 2891 movdqu %xmm0,48(%rdi) 2892 movdqu %xmm0,64(%rdi) 2893 movdqu %xmm0,80(%rdi) 2894 jmp .Ladd_doneq 2895 2896.align 32 2897.Ladd_doubleq: 2898.byte 102,72,15,126,206 2899.byte 102,72,15,126,199 2900 addq $416,%rsp 2901.cfi_adjust_cfa_offset -416 2902 jmp .Lpoint_double_shortcutq 2903.cfi_adjust_cfa_offset 416 2904 2905.align 32 2906.Ladd_proceedq: 2907 movq 0+64(%rsp),%rax 2908 movq 8+64(%rsp),%r14 2909 leaq 0+64(%rsp),%rsi 2910 movq 16+64(%rsp),%r15 2911 movq 24+64(%rsp),%r8 2912 leaq 96(%rsp),%rdi 2913 call __ecp_nistz256_sqr_montq 2914 2915 movq 448(%rsp),%rax 2916 leaq 448(%rsp),%rbx 2917 movq 0+0(%rsp),%r9 2918 movq 8+0(%rsp),%r10 2919 leaq 0+0(%rsp),%rsi 2920 movq 16+0(%rsp),%r11 2921 movq 24+0(%rsp),%r12 2922 leaq 352(%rsp),%rdi 2923 call __ecp_nistz256_mul_montq 2924 2925 movq 0+0(%rsp),%rax 2926 movq 8+0(%rsp),%r14 2927 leaq 0+0(%rsp),%rsi 2928 movq 16+0(%rsp),%r15 2929 movq 24+0(%rsp),%r8 2930 leaq 32(%rsp),%rdi 2931 call __ecp_nistz256_sqr_montq 2932 2933 movq 544(%rsp),%rax 2934 leaq 544(%rsp),%rbx 2935 movq 0+352(%rsp),%r9 2936 movq 8+352(%rsp),%r10 2937 leaq 0+352(%rsp),%rsi 2938 movq 16+352(%rsp),%r11 2939 movq 24+352(%rsp),%r12 2940 leaq 352(%rsp),%rdi 2941 call __ecp_nistz256_mul_montq 2942 2943 movq 0(%rsp),%rax 2944 leaq 0(%rsp),%rbx 2945 movq 0+32(%rsp),%r9 2946 movq 8+32(%rsp),%r10 2947 leaq 0+32(%rsp),%rsi 2948 movq 16+32(%rsp),%r11 2949 movq 24+32(%rsp),%r12 2950 leaq 128(%rsp),%rdi 2951 call __ecp_nistz256_mul_montq 2952 2953 movq 160(%rsp),%rax 2954 leaq 160(%rsp),%rbx 2955 movq 0+32(%rsp),%r9 2956 movq 8+32(%rsp),%r10 2957 leaq 0+32(%rsp),%rsi 2958 movq 16+32(%rsp),%r11 2959 movq 24+32(%rsp),%r12 2960 leaq 192(%rsp),%rdi 2961 call __ecp_nistz256_mul_montq 2962 2963 2964 2965 2966 xorq %r11,%r11 2967 addq %r12,%r12 2968 leaq 96(%rsp),%rsi 2969 adcq %r13,%r13 2970 movq %r12,%rax 2971 adcq %r8,%r8 2972 adcq %r9,%r9 2973 movq %r13,%rbp 2974 adcq $0,%r11 2975 2976 subq $-1,%r12 2977 movq %r8,%rcx 2978 sbbq %r14,%r13 2979 sbbq $0,%r8 2980 movq %r9,%r10 2981 sbbq %r15,%r9 2982 sbbq $0,%r11 2983 2984 cmovcq %rax,%r12 2985 movq 0(%rsi),%rax 2986 cmovcq %rbp,%r13 2987 movq 8(%rsi),%rbp 2988 cmovcq %rcx,%r8 2989 movq 16(%rsi),%rcx 2990 cmovcq %r10,%r9 2991 movq 24(%rsi),%r10 2992 2993 call __ecp_nistz256_subq 2994 2995 leaq 128(%rsp),%rbx 2996 leaq 288(%rsp),%rdi 2997 call __ecp_nistz256_sub_fromq 2998 2999 movq 192+0(%rsp),%rax 3000 movq 192+8(%rsp),%rbp 3001 movq 192+16(%rsp),%rcx 3002 movq 192+24(%rsp),%r10 3003 leaq 320(%rsp),%rdi 3004 3005 call __ecp_nistz256_subq 3006 3007 movq %r12,0(%rdi) 3008 movq %r13,8(%rdi) 3009 movq %r8,16(%rdi) 3010 movq %r9,24(%rdi) 3011 movq 128(%rsp),%rax 3012 leaq 128(%rsp),%rbx 3013 movq 0+224(%rsp),%r9 3014 movq 8+224(%rsp),%r10 3015 leaq 0+224(%rsp),%rsi 3016 movq 16+224(%rsp),%r11 3017 movq 24+224(%rsp),%r12 3018 leaq 256(%rsp),%rdi 3019 call __ecp_nistz256_mul_montq 3020 3021 movq 320(%rsp),%rax 3022 leaq 320(%rsp),%rbx 3023 movq 0+64(%rsp),%r9 3024 movq 8+64(%rsp),%r10 3025 leaq 0+64(%rsp),%rsi 3026 movq 16+64(%rsp),%r11 3027 movq 24+64(%rsp),%r12 3028 leaq 320(%rsp),%rdi 3029 call __ecp_nistz256_mul_montq 3030 3031 leaq 256(%rsp),%rbx 3032 leaq 320(%rsp),%rdi 3033 call __ecp_nistz256_sub_fromq 3034 3035.byte 102,72,15,126,199 3036 3037 movdqa %xmm5,%xmm0 3038 movdqa %xmm5,%xmm1 3039 pandn 352(%rsp),%xmm0 3040 movdqa %xmm5,%xmm2 3041 pandn 352+16(%rsp),%xmm1 3042 movdqa %xmm5,%xmm3 3043 pand 544(%rsp),%xmm2 3044 pand 544+16(%rsp),%xmm3 3045 por %xmm0,%xmm2 3046 por %xmm1,%xmm3 3047 3048 movdqa %xmm4,%xmm0 3049 movdqa %xmm4,%xmm1 3050 pandn %xmm2,%xmm0 3051 movdqa %xmm4,%xmm2 3052 pandn %xmm3,%xmm1 3053 movdqa %xmm4,%xmm3 3054 pand 448(%rsp),%xmm2 3055 pand 448+16(%rsp),%xmm3 3056 por %xmm0,%xmm2 3057 por %xmm1,%xmm3 3058 movdqu %xmm2,64(%rdi) 3059 movdqu %xmm3,80(%rdi) 3060 3061 movdqa %xmm5,%xmm0 3062 movdqa %xmm5,%xmm1 3063 pandn 288(%rsp),%xmm0 3064 movdqa %xmm5,%xmm2 3065 pandn 288+16(%rsp),%xmm1 3066 movdqa %xmm5,%xmm3 3067 pand 480(%rsp),%xmm2 3068 pand 480+16(%rsp),%xmm3 3069 por %xmm0,%xmm2 3070 por %xmm1,%xmm3 3071 3072 movdqa %xmm4,%xmm0 3073 movdqa %xmm4,%xmm1 3074 pandn %xmm2,%xmm0 3075 movdqa %xmm4,%xmm2 3076 pandn %xmm3,%xmm1 3077 movdqa %xmm4,%xmm3 3078 pand 384(%rsp),%xmm2 3079 pand 384+16(%rsp),%xmm3 3080 por %xmm0,%xmm2 3081 por %xmm1,%xmm3 3082 movdqu %xmm2,0(%rdi) 3083 movdqu %xmm3,16(%rdi) 3084 3085 movdqa %xmm5,%xmm0 3086 movdqa %xmm5,%xmm1 3087 pandn 320(%rsp),%xmm0 3088 movdqa %xmm5,%xmm2 3089 pandn 320+16(%rsp),%xmm1 3090 movdqa %xmm5,%xmm3 3091 pand 512(%rsp),%xmm2 3092 pand 512+16(%rsp),%xmm3 3093 por %xmm0,%xmm2 3094 por %xmm1,%xmm3 3095 3096 movdqa %xmm4,%xmm0 3097 movdqa %xmm4,%xmm1 3098 pandn %xmm2,%xmm0 3099 movdqa %xmm4,%xmm2 3100 pandn %xmm3,%xmm1 3101 movdqa %xmm4,%xmm3 3102 pand 416(%rsp),%xmm2 3103 pand 416+16(%rsp),%xmm3 3104 por %xmm0,%xmm2 3105 por %xmm1,%xmm3 3106 movdqu %xmm2,32(%rdi) 3107 movdqu %xmm3,48(%rdi) 3108 3109.Ladd_doneq: 3110 leaq 576+56(%rsp),%rsi 3111.cfi_def_cfa %rsi,8 3112 movq -48(%rsi),%r15 3113.cfi_restore %r15 3114 movq -40(%rsi),%r14 3115.cfi_restore %r14 3116 movq -32(%rsi),%r13 3117.cfi_restore %r13 3118 movq -24(%rsi),%r12 3119.cfi_restore %r12 3120 movq -16(%rsi),%rbx 3121.cfi_restore %rbx 3122 movq -8(%rsi),%rbp 3123.cfi_restore %rbp 3124 leaq (%rsi),%rsp 3125.cfi_def_cfa_register %rsp 3126.Lpoint_addq_epilogue: 3127 .byte 0xf3,0xc3 3128.cfi_endproc 3129.size GFp_nistz256_point_add,.-GFp_nistz256_point_add 3130.globl GFp_nistz256_point_add_affine 3131.hidden GFp_nistz256_point_add_affine 3132.type GFp_nistz256_point_add_affine,@function 3133.align 32 3134GFp_nistz256_point_add_affine: 3135.cfi_startproc 3136 leaq GFp_ia32cap_P(%rip),%rcx 3137 movq 8(%rcx),%rcx 3138 andl $0x80100,%ecx 3139 cmpl $0x80100,%ecx 3140 je .Lpoint_add_affinex 3141 pushq %rbp 3142.cfi_adjust_cfa_offset 8 3143.cfi_offset %rbp,-16 3144 pushq %rbx 3145.cfi_adjust_cfa_offset 8 3146.cfi_offset %rbx,-24 3147 pushq %r12 3148.cfi_adjust_cfa_offset 8 3149.cfi_offset %r12,-32 3150 pushq %r13 3151.cfi_adjust_cfa_offset 8 3152.cfi_offset %r13,-40 3153 pushq %r14 3154.cfi_adjust_cfa_offset 8 3155.cfi_offset %r14,-48 3156 pushq %r15 3157.cfi_adjust_cfa_offset 8 3158.cfi_offset %r15,-56 3159 subq $480+8,%rsp 3160.cfi_adjust_cfa_offset 32*15+8 3161.Ladd_affineq_body: 3162 3163 movdqu 0(%rsi),%xmm0 3164 movq %rdx,%rbx 3165 movdqu 16(%rsi),%xmm1 3166 movdqu 32(%rsi),%xmm2 3167 movdqu 48(%rsi),%xmm3 3168 movdqu 64(%rsi),%xmm4 3169 movdqu 80(%rsi),%xmm5 3170 movq 64+0(%rsi),%rax 3171 movq 64+8(%rsi),%r14 3172 movq 64+16(%rsi),%r15 3173 movq 64+24(%rsi),%r8 3174 movdqa %xmm0,320(%rsp) 3175 movdqa %xmm1,320+16(%rsp) 3176 movdqa %xmm2,352(%rsp) 3177 movdqa %xmm3,352+16(%rsp) 3178 movdqa %xmm4,384(%rsp) 3179 movdqa %xmm5,384+16(%rsp) 3180 por %xmm4,%xmm5 3181 3182 movdqu 0(%rbx),%xmm0 3183 pshufd $0xb1,%xmm5,%xmm3 3184 movdqu 16(%rbx),%xmm1 3185 movdqu 32(%rbx),%xmm2 3186 por %xmm3,%xmm5 3187 movdqu 48(%rbx),%xmm3 3188 movdqa %xmm0,416(%rsp) 3189 pshufd $0x1e,%xmm5,%xmm4 3190 movdqa %xmm1,416+16(%rsp) 3191 por %xmm0,%xmm1 3192.byte 102,72,15,110,199 3193 movdqa %xmm2,448(%rsp) 3194 movdqa %xmm3,448+16(%rsp) 3195 por %xmm2,%xmm3 3196 por %xmm4,%xmm5 3197 pxor %xmm4,%xmm4 3198 por %xmm1,%xmm3 3199 3200 leaq 64-0(%rsi),%rsi 3201 leaq 32(%rsp),%rdi 3202 call __ecp_nistz256_sqr_montq 3203 3204 pcmpeqd %xmm4,%xmm5 3205 pshufd $0xb1,%xmm3,%xmm4 3206 movq 0(%rbx),%rax 3207 3208 movq %r12,%r9 3209 por %xmm3,%xmm4 3210 pshufd $0,%xmm5,%xmm5 3211 pshufd $0x1e,%xmm4,%xmm3 3212 movq %r13,%r10 3213 por %xmm3,%xmm4 3214 pxor %xmm3,%xmm3 3215 movq %r14,%r11 3216 pcmpeqd %xmm3,%xmm4 3217 pshufd $0,%xmm4,%xmm4 3218 3219 leaq 32-0(%rsp),%rsi 3220 movq %r15,%r12 3221 leaq 0(%rsp),%rdi 3222 call __ecp_nistz256_mul_montq 3223 3224 leaq 320(%rsp),%rbx 3225 leaq 64(%rsp),%rdi 3226 call __ecp_nistz256_sub_fromq 3227 3228 movq 384(%rsp),%rax 3229 leaq 384(%rsp),%rbx 3230 movq 0+32(%rsp),%r9 3231 movq 8+32(%rsp),%r10 3232 leaq 0+32(%rsp),%rsi 3233 movq 16+32(%rsp),%r11 3234 movq 24+32(%rsp),%r12 3235 leaq 32(%rsp),%rdi 3236 call __ecp_nistz256_mul_montq 3237 3238 movq 384(%rsp),%rax 3239 leaq 384(%rsp),%rbx 3240 movq 0+64(%rsp),%r9 3241 movq 8+64(%rsp),%r10 3242 leaq 0+64(%rsp),%rsi 3243 movq 16+64(%rsp),%r11 3244 movq 24+64(%rsp),%r12 3245 leaq 288(%rsp),%rdi 3246 call __ecp_nistz256_mul_montq 3247 3248 movq 448(%rsp),%rax 3249 leaq 448(%rsp),%rbx 3250 movq 0+32(%rsp),%r9 3251 movq 8+32(%rsp),%r10 3252 leaq 0+32(%rsp),%rsi 3253 movq 16+32(%rsp),%r11 3254 movq 24+32(%rsp),%r12 3255 leaq 32(%rsp),%rdi 3256 call __ecp_nistz256_mul_montq 3257 3258 leaq 352(%rsp),%rbx 3259 leaq 96(%rsp),%rdi 3260 call __ecp_nistz256_sub_fromq 3261 3262 movq 0+64(%rsp),%rax 3263 movq 8+64(%rsp),%r14 3264 leaq 0+64(%rsp),%rsi 3265 movq 16+64(%rsp),%r15 3266 movq 24+64(%rsp),%r8 3267 leaq 128(%rsp),%rdi 3268 call __ecp_nistz256_sqr_montq 3269 3270 movq 0+96(%rsp),%rax 3271 movq 8+96(%rsp),%r14 3272 leaq 0+96(%rsp),%rsi 3273 movq 16+96(%rsp),%r15 3274 movq 24+96(%rsp),%r8 3275 leaq 192(%rsp),%rdi 3276 call __ecp_nistz256_sqr_montq 3277 3278 movq 128(%rsp),%rax 3279 leaq 128(%rsp),%rbx 3280 movq 0+64(%rsp),%r9 3281 movq 8+64(%rsp),%r10 3282 leaq 0+64(%rsp),%rsi 3283 movq 16+64(%rsp),%r11 3284 movq 24+64(%rsp),%r12 3285 leaq 160(%rsp),%rdi 3286 call __ecp_nistz256_mul_montq 3287 3288 movq 320(%rsp),%rax 3289 leaq 320(%rsp),%rbx 3290 movq 0+128(%rsp),%r9 3291 movq 8+128(%rsp),%r10 3292 leaq 0+128(%rsp),%rsi 3293 movq 16+128(%rsp),%r11 3294 movq 24+128(%rsp),%r12 3295 leaq 0(%rsp),%rdi 3296 call __ecp_nistz256_mul_montq 3297 3298 3299 3300 3301 xorq %r11,%r11 3302 addq %r12,%r12 3303 leaq 192(%rsp),%rsi 3304 adcq %r13,%r13 3305 movq %r12,%rax 3306 adcq %r8,%r8 3307 adcq %r9,%r9 3308 movq %r13,%rbp 3309 adcq $0,%r11 3310 3311 subq $-1,%r12 3312 movq %r8,%rcx 3313 sbbq %r14,%r13 3314 sbbq $0,%r8 3315 movq %r9,%r10 3316 sbbq %r15,%r9 3317 sbbq $0,%r11 3318 3319 cmovcq %rax,%r12 3320 movq 0(%rsi),%rax 3321 cmovcq %rbp,%r13 3322 movq 8(%rsi),%rbp 3323 cmovcq %rcx,%r8 3324 movq 16(%rsi),%rcx 3325 cmovcq %r10,%r9 3326 movq 24(%rsi),%r10 3327 3328 call __ecp_nistz256_subq 3329 3330 leaq 160(%rsp),%rbx 3331 leaq 224(%rsp),%rdi 3332 call __ecp_nistz256_sub_fromq 3333 3334 movq 0+0(%rsp),%rax 3335 movq 0+8(%rsp),%rbp 3336 movq 0+16(%rsp),%rcx 3337 movq 0+24(%rsp),%r10 3338 leaq 64(%rsp),%rdi 3339 3340 call __ecp_nistz256_subq 3341 3342 movq %r12,0(%rdi) 3343 movq %r13,8(%rdi) 3344 movq %r8,16(%rdi) 3345 movq %r9,24(%rdi) 3346 movq 352(%rsp),%rax 3347 leaq 352(%rsp),%rbx 3348 movq 0+160(%rsp),%r9 3349 movq 8+160(%rsp),%r10 3350 leaq 0+160(%rsp),%rsi 3351 movq 16+160(%rsp),%r11 3352 movq 24+160(%rsp),%r12 3353 leaq 32(%rsp),%rdi 3354 call __ecp_nistz256_mul_montq 3355 3356 movq 96(%rsp),%rax 3357 leaq 96(%rsp),%rbx 3358 movq 0+64(%rsp),%r9 3359 movq 8+64(%rsp),%r10 3360 leaq 0+64(%rsp),%rsi 3361 movq 16+64(%rsp),%r11 3362 movq 24+64(%rsp),%r12 3363 leaq 64(%rsp),%rdi 3364 call __ecp_nistz256_mul_montq 3365 3366 leaq 32(%rsp),%rbx 3367 leaq 256(%rsp),%rdi 3368 call __ecp_nistz256_sub_fromq 3369 3370.byte 102,72,15,126,199 3371 3372 movdqa %xmm5,%xmm0 3373 movdqa %xmm5,%xmm1 3374 pandn 288(%rsp),%xmm0 3375 movdqa %xmm5,%xmm2 3376 pandn 288+16(%rsp),%xmm1 3377 movdqa %xmm5,%xmm3 3378 pand .LONE_mont(%rip),%xmm2 3379 pand .LONE_mont+16(%rip),%xmm3 3380 por %xmm0,%xmm2 3381 por %xmm1,%xmm3 3382 3383 movdqa %xmm4,%xmm0 3384 movdqa %xmm4,%xmm1 3385 pandn %xmm2,%xmm0 3386 movdqa %xmm4,%xmm2 3387 pandn %xmm3,%xmm1 3388 movdqa %xmm4,%xmm3 3389 pand 384(%rsp),%xmm2 3390 pand 384+16(%rsp),%xmm3 3391 por %xmm0,%xmm2 3392 por %xmm1,%xmm3 3393 movdqu %xmm2,64(%rdi) 3394 movdqu %xmm3,80(%rdi) 3395 3396 movdqa %xmm5,%xmm0 3397 movdqa %xmm5,%xmm1 3398 pandn 224(%rsp),%xmm0 3399 movdqa %xmm5,%xmm2 3400 pandn 224+16(%rsp),%xmm1 3401 movdqa %xmm5,%xmm3 3402 pand 416(%rsp),%xmm2 3403 pand 416+16(%rsp),%xmm3 3404 por %xmm0,%xmm2 3405 por %xmm1,%xmm3 3406 3407 movdqa %xmm4,%xmm0 3408 movdqa %xmm4,%xmm1 3409 pandn %xmm2,%xmm0 3410 movdqa %xmm4,%xmm2 3411 pandn %xmm3,%xmm1 3412 movdqa %xmm4,%xmm3 3413 pand 320(%rsp),%xmm2 3414 pand 320+16(%rsp),%xmm3 3415 por %xmm0,%xmm2 3416 por %xmm1,%xmm3 3417 movdqu %xmm2,0(%rdi) 3418 movdqu %xmm3,16(%rdi) 3419 3420 movdqa %xmm5,%xmm0 3421 movdqa %xmm5,%xmm1 3422 pandn 256(%rsp),%xmm0 3423 movdqa %xmm5,%xmm2 3424 pandn 256+16(%rsp),%xmm1 3425 movdqa %xmm5,%xmm3 3426 pand 448(%rsp),%xmm2 3427 pand 448+16(%rsp),%xmm3 3428 por %xmm0,%xmm2 3429 por %xmm1,%xmm3 3430 3431 movdqa %xmm4,%xmm0 3432 movdqa %xmm4,%xmm1 3433 pandn %xmm2,%xmm0 3434 movdqa %xmm4,%xmm2 3435 pandn %xmm3,%xmm1 3436 movdqa %xmm4,%xmm3 3437 pand 352(%rsp),%xmm2 3438 pand 352+16(%rsp),%xmm3 3439 por %xmm0,%xmm2 3440 por %xmm1,%xmm3 3441 movdqu %xmm2,32(%rdi) 3442 movdqu %xmm3,48(%rdi) 3443 3444 leaq 480+56(%rsp),%rsi 3445.cfi_def_cfa %rsi,8 3446 movq -48(%rsi),%r15 3447.cfi_restore %r15 3448 movq -40(%rsi),%r14 3449.cfi_restore %r14 3450 movq -32(%rsi),%r13 3451.cfi_restore %r13 3452 movq -24(%rsi),%r12 3453.cfi_restore %r12 3454 movq -16(%rsi),%rbx 3455.cfi_restore %rbx 3456 movq -8(%rsi),%rbp 3457.cfi_restore %rbp 3458 leaq (%rsi),%rsp 3459.cfi_def_cfa_register %rsp 3460.Ladd_affineq_epilogue: 3461 .byte 0xf3,0xc3 3462.cfi_endproc 3463.size GFp_nistz256_point_add_affine,.-GFp_nistz256_point_add_affine 3464.type __ecp_nistz256_add_tox,@function 3465.align 32 3466__ecp_nistz256_add_tox: 3467.cfi_startproc 3468 xorq %r11,%r11 3469 adcq 0(%rbx),%r12 3470 adcq 8(%rbx),%r13 3471 movq %r12,%rax 3472 adcq 16(%rbx),%r8 3473 adcq 24(%rbx),%r9 3474 movq %r13,%rbp 3475 adcq $0,%r11 3476 3477 xorq %r10,%r10 3478 sbbq $-1,%r12 3479 movq %r8,%rcx 3480 sbbq %r14,%r13 3481 sbbq $0,%r8 3482 movq %r9,%r10 3483 sbbq %r15,%r9 3484 sbbq $0,%r11 3485 3486 cmovcq %rax,%r12 3487 cmovcq %rbp,%r13 3488 movq %r12,0(%rdi) 3489 cmovcq %rcx,%r8 3490 movq %r13,8(%rdi) 3491 cmovcq %r10,%r9 3492 movq %r8,16(%rdi) 3493 movq %r9,24(%rdi) 3494 3495 .byte 0xf3,0xc3 3496.cfi_endproc 3497.size __ecp_nistz256_add_tox,.-__ecp_nistz256_add_tox 3498 3499.type __ecp_nistz256_sub_fromx,@function 3500.align 32 3501__ecp_nistz256_sub_fromx: 3502.cfi_startproc 3503 xorq %r11,%r11 3504 sbbq 0(%rbx),%r12 3505 sbbq 8(%rbx),%r13 3506 movq %r12,%rax 3507 sbbq 16(%rbx),%r8 3508 sbbq 24(%rbx),%r9 3509 movq %r13,%rbp 3510 sbbq $0,%r11 3511 3512 xorq %r10,%r10 3513 adcq $-1,%r12 3514 movq %r8,%rcx 3515 adcq %r14,%r13 3516 adcq $0,%r8 3517 movq %r9,%r10 3518 adcq %r15,%r9 3519 3520 btq $0,%r11 3521 cmovncq %rax,%r12 3522 cmovncq %rbp,%r13 3523 movq %r12,0(%rdi) 3524 cmovncq %rcx,%r8 3525 movq %r13,8(%rdi) 3526 cmovncq %r10,%r9 3527 movq %r8,16(%rdi) 3528 movq %r9,24(%rdi) 3529 3530 .byte 0xf3,0xc3 3531.cfi_endproc 3532.size __ecp_nistz256_sub_fromx,.-__ecp_nistz256_sub_fromx 3533 3534.type __ecp_nistz256_subx,@function 3535.align 32 3536__ecp_nistz256_subx: 3537.cfi_startproc 3538 xorq %r11,%r11 3539 sbbq %r12,%rax 3540 sbbq %r13,%rbp 3541 movq %rax,%r12 3542 sbbq %r8,%rcx 3543 sbbq %r9,%r10 3544 movq %rbp,%r13 3545 sbbq $0,%r11 3546 3547 xorq %r9,%r9 3548 adcq $-1,%rax 3549 movq %rcx,%r8 3550 adcq %r14,%rbp 3551 adcq $0,%rcx 3552 movq %r10,%r9 3553 adcq %r15,%r10 3554 3555 btq $0,%r11 3556 cmovcq %rax,%r12 3557 cmovcq %rbp,%r13 3558 cmovcq %rcx,%r8 3559 cmovcq %r10,%r9 3560 3561 .byte 0xf3,0xc3 3562.cfi_endproc 3563.size __ecp_nistz256_subx,.-__ecp_nistz256_subx 3564 3565.type __ecp_nistz256_mul_by_2x,@function 3566.align 32 3567__ecp_nistz256_mul_by_2x: 3568.cfi_startproc 3569 xorq %r11,%r11 3570 adcq %r12,%r12 3571 adcq %r13,%r13 3572 movq %r12,%rax 3573 adcq %r8,%r8 3574 adcq %r9,%r9 3575 movq %r13,%rbp 3576 adcq $0,%r11 3577 3578 xorq %r10,%r10 3579 sbbq $-1,%r12 3580 movq %r8,%rcx 3581 sbbq %r14,%r13 3582 sbbq $0,%r8 3583 movq %r9,%r10 3584 sbbq %r15,%r9 3585 sbbq $0,%r11 3586 3587 cmovcq %rax,%r12 3588 cmovcq %rbp,%r13 3589 movq %r12,0(%rdi) 3590 cmovcq %rcx,%r8 3591 movq %r13,8(%rdi) 3592 cmovcq %r10,%r9 3593 movq %r8,16(%rdi) 3594 movq %r9,24(%rdi) 3595 3596 .byte 0xf3,0xc3 3597.cfi_endproc 3598.size __ecp_nistz256_mul_by_2x,.-__ecp_nistz256_mul_by_2x 3599.type GFp_nistz256_point_doublex,@function 3600.align 32 3601GFp_nistz256_point_doublex: 3602.cfi_startproc 3603.Lpoint_doublex: 3604 pushq %rbp 3605.cfi_adjust_cfa_offset 8 3606.cfi_offset %rbp,-16 3607 pushq %rbx 3608.cfi_adjust_cfa_offset 8 3609.cfi_offset %rbx,-24 3610 pushq %r12 3611.cfi_adjust_cfa_offset 8 3612.cfi_offset %r12,-32 3613 pushq %r13 3614.cfi_adjust_cfa_offset 8 3615.cfi_offset %r13,-40 3616 pushq %r14 3617.cfi_adjust_cfa_offset 8 3618.cfi_offset %r14,-48 3619 pushq %r15 3620.cfi_adjust_cfa_offset 8 3621.cfi_offset %r15,-56 3622 subq $160+8,%rsp 3623.cfi_adjust_cfa_offset 32*5+8 3624.Lpoint_doublex_body: 3625 3626.Lpoint_double_shortcutx: 3627 movdqu 0(%rsi),%xmm0 3628 movq %rsi,%rbx 3629 movdqu 16(%rsi),%xmm1 3630 movq 32+0(%rsi),%r12 3631 movq 32+8(%rsi),%r13 3632 movq 32+16(%rsi),%r8 3633 movq 32+24(%rsi),%r9 3634 movq .Lpoly+8(%rip),%r14 3635 movq .Lpoly+24(%rip),%r15 3636 movdqa %xmm0,96(%rsp) 3637 movdqa %xmm1,96+16(%rsp) 3638 leaq 32(%rdi),%r10 3639 leaq 64(%rdi),%r11 3640.byte 102,72,15,110,199 3641.byte 102,73,15,110,202 3642.byte 102,73,15,110,211 3643 3644 leaq 0(%rsp),%rdi 3645 call __ecp_nistz256_mul_by_2x 3646 3647 movq 64+0(%rsi),%rdx 3648 movq 64+8(%rsi),%r14 3649 movq 64+16(%rsi),%r15 3650 movq 64+24(%rsi),%r8 3651 leaq 64-128(%rsi),%rsi 3652 leaq 64(%rsp),%rdi 3653 call __ecp_nistz256_sqr_montx 3654 3655 movq 0+0(%rsp),%rdx 3656 movq 8+0(%rsp),%r14 3657 leaq -128+0(%rsp),%rsi 3658 movq 16+0(%rsp),%r15 3659 movq 24+0(%rsp),%r8 3660 leaq 0(%rsp),%rdi 3661 call __ecp_nistz256_sqr_montx 3662 3663 movq 32(%rbx),%rdx 3664 movq 64+0(%rbx),%r9 3665 movq 64+8(%rbx),%r10 3666 movq 64+16(%rbx),%r11 3667 movq 64+24(%rbx),%r12 3668 leaq 64-128(%rbx),%rsi 3669 leaq 32(%rbx),%rbx 3670.byte 102,72,15,126,215 3671 call __ecp_nistz256_mul_montx 3672 call __ecp_nistz256_mul_by_2x 3673 3674 movq 96+0(%rsp),%r12 3675 movq 96+8(%rsp),%r13 3676 leaq 64(%rsp),%rbx 3677 movq 96+16(%rsp),%r8 3678 movq 96+24(%rsp),%r9 3679 leaq 32(%rsp),%rdi 3680 call __ecp_nistz256_add_tox 3681 3682 movq 96+0(%rsp),%r12 3683 movq 96+8(%rsp),%r13 3684 leaq 64(%rsp),%rbx 3685 movq 96+16(%rsp),%r8 3686 movq 96+24(%rsp),%r9 3687 leaq 64(%rsp),%rdi 3688 call __ecp_nistz256_sub_fromx 3689 3690 movq 0+0(%rsp),%rdx 3691 movq 8+0(%rsp),%r14 3692 leaq -128+0(%rsp),%rsi 3693 movq 16+0(%rsp),%r15 3694 movq 24+0(%rsp),%r8 3695.byte 102,72,15,126,207 3696 call __ecp_nistz256_sqr_montx 3697 xorq %r9,%r9 3698 movq %r12,%rax 3699 addq $-1,%r12 3700 movq %r13,%r10 3701 adcq %rsi,%r13 3702 movq %r14,%rcx 3703 adcq $0,%r14 3704 movq %r15,%r8 3705 adcq %rbp,%r15 3706 adcq $0,%r9 3707 xorq %rsi,%rsi 3708 testq $1,%rax 3709 3710 cmovzq %rax,%r12 3711 cmovzq %r10,%r13 3712 cmovzq %rcx,%r14 3713 cmovzq %r8,%r15 3714 cmovzq %rsi,%r9 3715 3716 movq %r13,%rax 3717 shrq $1,%r12 3718 shlq $63,%rax 3719 movq %r14,%r10 3720 shrq $1,%r13 3721 orq %rax,%r12 3722 shlq $63,%r10 3723 movq %r15,%rcx 3724 shrq $1,%r14 3725 orq %r10,%r13 3726 shlq $63,%rcx 3727 movq %r12,0(%rdi) 3728 shrq $1,%r15 3729 movq %r13,8(%rdi) 3730 shlq $63,%r9 3731 orq %rcx,%r14 3732 orq %r9,%r15 3733 movq %r14,16(%rdi) 3734 movq %r15,24(%rdi) 3735 movq 64(%rsp),%rdx 3736 leaq 64(%rsp),%rbx 3737 movq 0+32(%rsp),%r9 3738 movq 8+32(%rsp),%r10 3739 leaq -128+32(%rsp),%rsi 3740 movq 16+32(%rsp),%r11 3741 movq 24+32(%rsp),%r12 3742 leaq 32(%rsp),%rdi 3743 call __ecp_nistz256_mul_montx 3744 3745 leaq 128(%rsp),%rdi 3746 call __ecp_nistz256_mul_by_2x 3747 3748 leaq 32(%rsp),%rbx 3749 leaq 32(%rsp),%rdi 3750 call __ecp_nistz256_add_tox 3751 3752 movq 96(%rsp),%rdx 3753 leaq 96(%rsp),%rbx 3754 movq 0+0(%rsp),%r9 3755 movq 8+0(%rsp),%r10 3756 leaq -128+0(%rsp),%rsi 3757 movq 16+0(%rsp),%r11 3758 movq 24+0(%rsp),%r12 3759 leaq 0(%rsp),%rdi 3760 call __ecp_nistz256_mul_montx 3761 3762 leaq 128(%rsp),%rdi 3763 call __ecp_nistz256_mul_by_2x 3764 3765 movq 0+32(%rsp),%rdx 3766 movq 8+32(%rsp),%r14 3767 leaq -128+32(%rsp),%rsi 3768 movq 16+32(%rsp),%r15 3769 movq 24+32(%rsp),%r8 3770.byte 102,72,15,126,199 3771 call __ecp_nistz256_sqr_montx 3772 3773 leaq 128(%rsp),%rbx 3774 movq %r14,%r8 3775 movq %r15,%r9 3776 movq %rsi,%r14 3777 movq %rbp,%r15 3778 call __ecp_nistz256_sub_fromx 3779 3780 movq 0+0(%rsp),%rax 3781 movq 0+8(%rsp),%rbp 3782 movq 0+16(%rsp),%rcx 3783 movq 0+24(%rsp),%r10 3784 leaq 0(%rsp),%rdi 3785 call __ecp_nistz256_subx 3786 3787 movq 32(%rsp),%rdx 3788 leaq 32(%rsp),%rbx 3789 movq %r12,%r14 3790 xorl %ecx,%ecx 3791 movq %r12,0+0(%rsp) 3792 movq %r13,%r10 3793 movq %r13,0+8(%rsp) 3794 cmovzq %r8,%r11 3795 movq %r8,0+16(%rsp) 3796 leaq 0-128(%rsp),%rsi 3797 cmovzq %r9,%r12 3798 movq %r9,0+24(%rsp) 3799 movq %r14,%r9 3800 leaq 0(%rsp),%rdi 3801 call __ecp_nistz256_mul_montx 3802 3803.byte 102,72,15,126,203 3804.byte 102,72,15,126,207 3805 call __ecp_nistz256_sub_fromx 3806 3807 leaq 160+56(%rsp),%rsi 3808.cfi_def_cfa %rsi,8 3809 movq -48(%rsi),%r15 3810.cfi_restore %r15 3811 movq -40(%rsi),%r14 3812.cfi_restore %r14 3813 movq -32(%rsi),%r13 3814.cfi_restore %r13 3815 movq -24(%rsi),%r12 3816.cfi_restore %r12 3817 movq -16(%rsi),%rbx 3818.cfi_restore %rbx 3819 movq -8(%rsi),%rbp 3820.cfi_restore %rbp 3821 leaq (%rsi),%rsp 3822.cfi_def_cfa_register %rsp 3823.Lpoint_doublex_epilogue: 3824 .byte 0xf3,0xc3 3825.cfi_endproc 3826.size GFp_nistz256_point_doublex,.-GFp_nistz256_point_doublex 3827.type GFp_nistz256_point_addx,@function 3828.align 32 3829GFp_nistz256_point_addx: 3830.cfi_startproc 3831.Lpoint_addx: 3832 pushq %rbp 3833.cfi_adjust_cfa_offset 8 3834.cfi_offset %rbp,-16 3835 pushq %rbx 3836.cfi_adjust_cfa_offset 8 3837.cfi_offset %rbx,-24 3838 pushq %r12 3839.cfi_adjust_cfa_offset 8 3840.cfi_offset %r12,-32 3841 pushq %r13 3842.cfi_adjust_cfa_offset 8 3843.cfi_offset %r13,-40 3844 pushq %r14 3845.cfi_adjust_cfa_offset 8 3846.cfi_offset %r14,-48 3847 pushq %r15 3848.cfi_adjust_cfa_offset 8 3849.cfi_offset %r15,-56 3850 subq $576+8,%rsp 3851.cfi_adjust_cfa_offset 32*18+8 3852.Lpoint_addx_body: 3853 3854 movdqu 0(%rsi),%xmm0 3855 movdqu 16(%rsi),%xmm1 3856 movdqu 32(%rsi),%xmm2 3857 movdqu 48(%rsi),%xmm3 3858 movdqu 64(%rsi),%xmm4 3859 movdqu 80(%rsi),%xmm5 3860 movq %rsi,%rbx 3861 movq %rdx,%rsi 3862 movdqa %xmm0,384(%rsp) 3863 movdqa %xmm1,384+16(%rsp) 3864 movdqa %xmm2,416(%rsp) 3865 movdqa %xmm3,416+16(%rsp) 3866 movdqa %xmm4,448(%rsp) 3867 movdqa %xmm5,448+16(%rsp) 3868 por %xmm4,%xmm5 3869 3870 movdqu 0(%rsi),%xmm0 3871 pshufd $0xb1,%xmm5,%xmm3 3872 movdqu 16(%rsi),%xmm1 3873 movdqu 32(%rsi),%xmm2 3874 por %xmm3,%xmm5 3875 movdqu 48(%rsi),%xmm3 3876 movq 64+0(%rsi),%rdx 3877 movq 64+8(%rsi),%r14 3878 movq 64+16(%rsi),%r15 3879 movq 64+24(%rsi),%r8 3880 movdqa %xmm0,480(%rsp) 3881 pshufd $0x1e,%xmm5,%xmm4 3882 movdqa %xmm1,480+16(%rsp) 3883 movdqu 64(%rsi),%xmm0 3884 movdqu 80(%rsi),%xmm1 3885 movdqa %xmm2,512(%rsp) 3886 movdqa %xmm3,512+16(%rsp) 3887 por %xmm4,%xmm5 3888 pxor %xmm4,%xmm4 3889 por %xmm0,%xmm1 3890.byte 102,72,15,110,199 3891 3892 leaq 64-128(%rsi),%rsi 3893 movq %rdx,544+0(%rsp) 3894 movq %r14,544+8(%rsp) 3895 movq %r15,544+16(%rsp) 3896 movq %r8,544+24(%rsp) 3897 leaq 96(%rsp),%rdi 3898 call __ecp_nistz256_sqr_montx 3899 3900 pcmpeqd %xmm4,%xmm5 3901 pshufd $0xb1,%xmm1,%xmm4 3902 por %xmm1,%xmm4 3903 pshufd $0,%xmm5,%xmm5 3904 pshufd $0x1e,%xmm4,%xmm3 3905 por %xmm3,%xmm4 3906 pxor %xmm3,%xmm3 3907 pcmpeqd %xmm3,%xmm4 3908 pshufd $0,%xmm4,%xmm4 3909 movq 64+0(%rbx),%rdx 3910 movq 64+8(%rbx),%r14 3911 movq 64+16(%rbx),%r15 3912 movq 64+24(%rbx),%r8 3913.byte 102,72,15,110,203 3914 3915 leaq 64-128(%rbx),%rsi 3916 leaq 32(%rsp),%rdi 3917 call __ecp_nistz256_sqr_montx 3918 3919 movq 544(%rsp),%rdx 3920 leaq 544(%rsp),%rbx 3921 movq 0+96(%rsp),%r9 3922 movq 8+96(%rsp),%r10 3923 leaq -128+96(%rsp),%rsi 3924 movq 16+96(%rsp),%r11 3925 movq 24+96(%rsp),%r12 3926 leaq 224(%rsp),%rdi 3927 call __ecp_nistz256_mul_montx 3928 3929 movq 448(%rsp),%rdx 3930 leaq 448(%rsp),%rbx 3931 movq 0+32(%rsp),%r9 3932 movq 8+32(%rsp),%r10 3933 leaq -128+32(%rsp),%rsi 3934 movq 16+32(%rsp),%r11 3935 movq 24+32(%rsp),%r12 3936 leaq 256(%rsp),%rdi 3937 call __ecp_nistz256_mul_montx 3938 3939 movq 416(%rsp),%rdx 3940 leaq 416(%rsp),%rbx 3941 movq 0+224(%rsp),%r9 3942 movq 8+224(%rsp),%r10 3943 leaq -128+224(%rsp),%rsi 3944 movq 16+224(%rsp),%r11 3945 movq 24+224(%rsp),%r12 3946 leaq 224(%rsp),%rdi 3947 call __ecp_nistz256_mul_montx 3948 3949 movq 512(%rsp),%rdx 3950 leaq 512(%rsp),%rbx 3951 movq 0+256(%rsp),%r9 3952 movq 8+256(%rsp),%r10 3953 leaq -128+256(%rsp),%rsi 3954 movq 16+256(%rsp),%r11 3955 movq 24+256(%rsp),%r12 3956 leaq 256(%rsp),%rdi 3957 call __ecp_nistz256_mul_montx 3958 3959 leaq 224(%rsp),%rbx 3960 leaq 64(%rsp),%rdi 3961 call __ecp_nistz256_sub_fromx 3962 3963 orq %r13,%r12 3964 movdqa %xmm4,%xmm2 3965 orq %r8,%r12 3966 orq %r9,%r12 3967 por %xmm5,%xmm2 3968.byte 102,73,15,110,220 3969 3970 movq 384(%rsp),%rdx 3971 leaq 384(%rsp),%rbx 3972 movq 0+96(%rsp),%r9 3973 movq 8+96(%rsp),%r10 3974 leaq -128+96(%rsp),%rsi 3975 movq 16+96(%rsp),%r11 3976 movq 24+96(%rsp),%r12 3977 leaq 160(%rsp),%rdi 3978 call __ecp_nistz256_mul_montx 3979 3980 movq 480(%rsp),%rdx 3981 leaq 480(%rsp),%rbx 3982 movq 0+32(%rsp),%r9 3983 movq 8+32(%rsp),%r10 3984 leaq -128+32(%rsp),%rsi 3985 movq 16+32(%rsp),%r11 3986 movq 24+32(%rsp),%r12 3987 leaq 192(%rsp),%rdi 3988 call __ecp_nistz256_mul_montx 3989 3990 leaq 160(%rsp),%rbx 3991 leaq 0(%rsp),%rdi 3992 call __ecp_nistz256_sub_fromx 3993 3994 orq %r13,%r12 3995 orq %r8,%r12 3996 orq %r9,%r12 3997 3998.byte 102,73,15,126,208 3999.byte 102,73,15,126,217 4000 orq %r8,%r12 4001.byte 0x3e 4002 jnz .Ladd_proceedx 4003 4004 4005 4006 testq %r9,%r9 4007 jz .Ladd_doublex 4008 4009 4010 4011 4012 4013 4014.byte 102,72,15,126,199 4015 pxor %xmm0,%xmm0 4016 movdqu %xmm0,0(%rdi) 4017 movdqu %xmm0,16(%rdi) 4018 movdqu %xmm0,32(%rdi) 4019 movdqu %xmm0,48(%rdi) 4020 movdqu %xmm0,64(%rdi) 4021 movdqu %xmm0,80(%rdi) 4022 jmp .Ladd_donex 4023 4024.align 32 4025.Ladd_doublex: 4026.byte 102,72,15,126,206 4027.byte 102,72,15,126,199 4028 addq $416,%rsp 4029.cfi_adjust_cfa_offset -416 4030 jmp .Lpoint_double_shortcutx 4031.cfi_adjust_cfa_offset 416 4032 4033.align 32 4034.Ladd_proceedx: 4035 movq 0+64(%rsp),%rdx 4036 movq 8+64(%rsp),%r14 4037 leaq -128+64(%rsp),%rsi 4038 movq 16+64(%rsp),%r15 4039 movq 24+64(%rsp),%r8 4040 leaq 96(%rsp),%rdi 4041 call __ecp_nistz256_sqr_montx 4042 4043 movq 448(%rsp),%rdx 4044 leaq 448(%rsp),%rbx 4045 movq 0+0(%rsp),%r9 4046 movq 8+0(%rsp),%r10 4047 leaq -128+0(%rsp),%rsi 4048 movq 16+0(%rsp),%r11 4049 movq 24+0(%rsp),%r12 4050 leaq 352(%rsp),%rdi 4051 call __ecp_nistz256_mul_montx 4052 4053 movq 0+0(%rsp),%rdx 4054 movq 8+0(%rsp),%r14 4055 leaq -128+0(%rsp),%rsi 4056 movq 16+0(%rsp),%r15 4057 movq 24+0(%rsp),%r8 4058 leaq 32(%rsp),%rdi 4059 call __ecp_nistz256_sqr_montx 4060 4061 movq 544(%rsp),%rdx 4062 leaq 544(%rsp),%rbx 4063 movq 0+352(%rsp),%r9 4064 movq 8+352(%rsp),%r10 4065 leaq -128+352(%rsp),%rsi 4066 movq 16+352(%rsp),%r11 4067 movq 24+352(%rsp),%r12 4068 leaq 352(%rsp),%rdi 4069 call __ecp_nistz256_mul_montx 4070 4071 movq 0(%rsp),%rdx 4072 leaq 0(%rsp),%rbx 4073 movq 0+32(%rsp),%r9 4074 movq 8+32(%rsp),%r10 4075 leaq -128+32(%rsp),%rsi 4076 movq 16+32(%rsp),%r11 4077 movq 24+32(%rsp),%r12 4078 leaq 128(%rsp),%rdi 4079 call __ecp_nistz256_mul_montx 4080 4081 movq 160(%rsp),%rdx 4082 leaq 160(%rsp),%rbx 4083 movq 0+32(%rsp),%r9 4084 movq 8+32(%rsp),%r10 4085 leaq -128+32(%rsp),%rsi 4086 movq 16+32(%rsp),%r11 4087 movq 24+32(%rsp),%r12 4088 leaq 192(%rsp),%rdi 4089 call __ecp_nistz256_mul_montx 4090 4091 4092 4093 4094 xorq %r11,%r11 4095 addq %r12,%r12 4096 leaq 96(%rsp),%rsi 4097 adcq %r13,%r13 4098 movq %r12,%rax 4099 adcq %r8,%r8 4100 adcq %r9,%r9 4101 movq %r13,%rbp 4102 adcq $0,%r11 4103 4104 subq $-1,%r12 4105 movq %r8,%rcx 4106 sbbq %r14,%r13 4107 sbbq $0,%r8 4108 movq %r9,%r10 4109 sbbq %r15,%r9 4110 sbbq $0,%r11 4111 4112 cmovcq %rax,%r12 4113 movq 0(%rsi),%rax 4114 cmovcq %rbp,%r13 4115 movq 8(%rsi),%rbp 4116 cmovcq %rcx,%r8 4117 movq 16(%rsi),%rcx 4118 cmovcq %r10,%r9 4119 movq 24(%rsi),%r10 4120 4121 call __ecp_nistz256_subx 4122 4123 leaq 128(%rsp),%rbx 4124 leaq 288(%rsp),%rdi 4125 call __ecp_nistz256_sub_fromx 4126 4127 movq 192+0(%rsp),%rax 4128 movq 192+8(%rsp),%rbp 4129 movq 192+16(%rsp),%rcx 4130 movq 192+24(%rsp),%r10 4131 leaq 320(%rsp),%rdi 4132 4133 call __ecp_nistz256_subx 4134 4135 movq %r12,0(%rdi) 4136 movq %r13,8(%rdi) 4137 movq %r8,16(%rdi) 4138 movq %r9,24(%rdi) 4139 movq 128(%rsp),%rdx 4140 leaq 128(%rsp),%rbx 4141 movq 0+224(%rsp),%r9 4142 movq 8+224(%rsp),%r10 4143 leaq -128+224(%rsp),%rsi 4144 movq 16+224(%rsp),%r11 4145 movq 24+224(%rsp),%r12 4146 leaq 256(%rsp),%rdi 4147 call __ecp_nistz256_mul_montx 4148 4149 movq 320(%rsp),%rdx 4150 leaq 320(%rsp),%rbx 4151 movq 0+64(%rsp),%r9 4152 movq 8+64(%rsp),%r10 4153 leaq -128+64(%rsp),%rsi 4154 movq 16+64(%rsp),%r11 4155 movq 24+64(%rsp),%r12 4156 leaq 320(%rsp),%rdi 4157 call __ecp_nistz256_mul_montx 4158 4159 leaq 256(%rsp),%rbx 4160 leaq 320(%rsp),%rdi 4161 call __ecp_nistz256_sub_fromx 4162 4163.byte 102,72,15,126,199 4164 4165 movdqa %xmm5,%xmm0 4166 movdqa %xmm5,%xmm1 4167 pandn 352(%rsp),%xmm0 4168 movdqa %xmm5,%xmm2 4169 pandn 352+16(%rsp),%xmm1 4170 movdqa %xmm5,%xmm3 4171 pand 544(%rsp),%xmm2 4172 pand 544+16(%rsp),%xmm3 4173 por %xmm0,%xmm2 4174 por %xmm1,%xmm3 4175 4176 movdqa %xmm4,%xmm0 4177 movdqa %xmm4,%xmm1 4178 pandn %xmm2,%xmm0 4179 movdqa %xmm4,%xmm2 4180 pandn %xmm3,%xmm1 4181 movdqa %xmm4,%xmm3 4182 pand 448(%rsp),%xmm2 4183 pand 448+16(%rsp),%xmm3 4184 por %xmm0,%xmm2 4185 por %xmm1,%xmm3 4186 movdqu %xmm2,64(%rdi) 4187 movdqu %xmm3,80(%rdi) 4188 4189 movdqa %xmm5,%xmm0 4190 movdqa %xmm5,%xmm1 4191 pandn 288(%rsp),%xmm0 4192 movdqa %xmm5,%xmm2 4193 pandn 288+16(%rsp),%xmm1 4194 movdqa %xmm5,%xmm3 4195 pand 480(%rsp),%xmm2 4196 pand 480+16(%rsp),%xmm3 4197 por %xmm0,%xmm2 4198 por %xmm1,%xmm3 4199 4200 movdqa %xmm4,%xmm0 4201 movdqa %xmm4,%xmm1 4202 pandn %xmm2,%xmm0 4203 movdqa %xmm4,%xmm2 4204 pandn %xmm3,%xmm1 4205 movdqa %xmm4,%xmm3 4206 pand 384(%rsp),%xmm2 4207 pand 384+16(%rsp),%xmm3 4208 por %xmm0,%xmm2 4209 por %xmm1,%xmm3 4210 movdqu %xmm2,0(%rdi) 4211 movdqu %xmm3,16(%rdi) 4212 4213 movdqa %xmm5,%xmm0 4214 movdqa %xmm5,%xmm1 4215 pandn 320(%rsp),%xmm0 4216 movdqa %xmm5,%xmm2 4217 pandn 320+16(%rsp),%xmm1 4218 movdqa %xmm5,%xmm3 4219 pand 512(%rsp),%xmm2 4220 pand 512+16(%rsp),%xmm3 4221 por %xmm0,%xmm2 4222 por %xmm1,%xmm3 4223 4224 movdqa %xmm4,%xmm0 4225 movdqa %xmm4,%xmm1 4226 pandn %xmm2,%xmm0 4227 movdqa %xmm4,%xmm2 4228 pandn %xmm3,%xmm1 4229 movdqa %xmm4,%xmm3 4230 pand 416(%rsp),%xmm2 4231 pand 416+16(%rsp),%xmm3 4232 por %xmm0,%xmm2 4233 por %xmm1,%xmm3 4234 movdqu %xmm2,32(%rdi) 4235 movdqu %xmm3,48(%rdi) 4236 4237.Ladd_donex: 4238 leaq 576+56(%rsp),%rsi 4239.cfi_def_cfa %rsi,8 4240 movq -48(%rsi),%r15 4241.cfi_restore %r15 4242 movq -40(%rsi),%r14 4243.cfi_restore %r14 4244 movq -32(%rsi),%r13 4245.cfi_restore %r13 4246 movq -24(%rsi),%r12 4247.cfi_restore %r12 4248 movq -16(%rsi),%rbx 4249.cfi_restore %rbx 4250 movq -8(%rsi),%rbp 4251.cfi_restore %rbp 4252 leaq (%rsi),%rsp 4253.cfi_def_cfa_register %rsp 4254.Lpoint_addx_epilogue: 4255 .byte 0xf3,0xc3 4256.cfi_endproc 4257.size GFp_nistz256_point_addx,.-GFp_nistz256_point_addx 4258.type GFp_nistz256_point_add_affinex,@function 4259.align 32 4260GFp_nistz256_point_add_affinex: 4261.cfi_startproc 4262.Lpoint_add_affinex: 4263 pushq %rbp 4264.cfi_adjust_cfa_offset 8 4265.cfi_offset %rbp,-16 4266 pushq %rbx 4267.cfi_adjust_cfa_offset 8 4268.cfi_offset %rbx,-24 4269 pushq %r12 4270.cfi_adjust_cfa_offset 8 4271.cfi_offset %r12,-32 4272 pushq %r13 4273.cfi_adjust_cfa_offset 8 4274.cfi_offset %r13,-40 4275 pushq %r14 4276.cfi_adjust_cfa_offset 8 4277.cfi_offset %r14,-48 4278 pushq %r15 4279.cfi_adjust_cfa_offset 8 4280.cfi_offset %r15,-56 4281 subq $480+8,%rsp 4282.cfi_adjust_cfa_offset 32*15+8 4283.Ladd_affinex_body: 4284 4285 movdqu 0(%rsi),%xmm0 4286 movq %rdx,%rbx 4287 movdqu 16(%rsi),%xmm1 4288 movdqu 32(%rsi),%xmm2 4289 movdqu 48(%rsi),%xmm3 4290 movdqu 64(%rsi),%xmm4 4291 movdqu 80(%rsi),%xmm5 4292 movq 64+0(%rsi),%rdx 4293 movq 64+8(%rsi),%r14 4294 movq 64+16(%rsi),%r15 4295 movq 64+24(%rsi),%r8 4296 movdqa %xmm0,320(%rsp) 4297 movdqa %xmm1,320+16(%rsp) 4298 movdqa %xmm2,352(%rsp) 4299 movdqa %xmm3,352+16(%rsp) 4300 movdqa %xmm4,384(%rsp) 4301 movdqa %xmm5,384+16(%rsp) 4302 por %xmm4,%xmm5 4303 4304 movdqu 0(%rbx),%xmm0 4305 pshufd $0xb1,%xmm5,%xmm3 4306 movdqu 16(%rbx),%xmm1 4307 movdqu 32(%rbx),%xmm2 4308 por %xmm3,%xmm5 4309 movdqu 48(%rbx),%xmm3 4310 movdqa %xmm0,416(%rsp) 4311 pshufd $0x1e,%xmm5,%xmm4 4312 movdqa %xmm1,416+16(%rsp) 4313 por %xmm0,%xmm1 4314.byte 102,72,15,110,199 4315 movdqa %xmm2,448(%rsp) 4316 movdqa %xmm3,448+16(%rsp) 4317 por %xmm2,%xmm3 4318 por %xmm4,%xmm5 4319 pxor %xmm4,%xmm4 4320 por %xmm1,%xmm3 4321 4322 leaq 64-128(%rsi),%rsi 4323 leaq 32(%rsp),%rdi 4324 call __ecp_nistz256_sqr_montx 4325 4326 pcmpeqd %xmm4,%xmm5 4327 pshufd $0xb1,%xmm3,%xmm4 4328 movq 0(%rbx),%rdx 4329 4330 movq %r12,%r9 4331 por %xmm3,%xmm4 4332 pshufd $0,%xmm5,%xmm5 4333 pshufd $0x1e,%xmm4,%xmm3 4334 movq %r13,%r10 4335 por %xmm3,%xmm4 4336 pxor %xmm3,%xmm3 4337 movq %r14,%r11 4338 pcmpeqd %xmm3,%xmm4 4339 pshufd $0,%xmm4,%xmm4 4340 4341 leaq 32-128(%rsp),%rsi 4342 movq %r15,%r12 4343 leaq 0(%rsp),%rdi 4344 call __ecp_nistz256_mul_montx 4345 4346 leaq 320(%rsp),%rbx 4347 leaq 64(%rsp),%rdi 4348 call __ecp_nistz256_sub_fromx 4349 4350 movq 384(%rsp),%rdx 4351 leaq 384(%rsp),%rbx 4352 movq 0+32(%rsp),%r9 4353 movq 8+32(%rsp),%r10 4354 leaq -128+32(%rsp),%rsi 4355 movq 16+32(%rsp),%r11 4356 movq 24+32(%rsp),%r12 4357 leaq 32(%rsp),%rdi 4358 call __ecp_nistz256_mul_montx 4359 4360 movq 384(%rsp),%rdx 4361 leaq 384(%rsp),%rbx 4362 movq 0+64(%rsp),%r9 4363 movq 8+64(%rsp),%r10 4364 leaq -128+64(%rsp),%rsi 4365 movq 16+64(%rsp),%r11 4366 movq 24+64(%rsp),%r12 4367 leaq 288(%rsp),%rdi 4368 call __ecp_nistz256_mul_montx 4369 4370 movq 448(%rsp),%rdx 4371 leaq 448(%rsp),%rbx 4372 movq 0+32(%rsp),%r9 4373 movq 8+32(%rsp),%r10 4374 leaq -128+32(%rsp),%rsi 4375 movq 16+32(%rsp),%r11 4376 movq 24+32(%rsp),%r12 4377 leaq 32(%rsp),%rdi 4378 call __ecp_nistz256_mul_montx 4379 4380 leaq 352(%rsp),%rbx 4381 leaq 96(%rsp),%rdi 4382 call __ecp_nistz256_sub_fromx 4383 4384 movq 0+64(%rsp),%rdx 4385 movq 8+64(%rsp),%r14 4386 leaq -128+64(%rsp),%rsi 4387 movq 16+64(%rsp),%r15 4388 movq 24+64(%rsp),%r8 4389 leaq 128(%rsp),%rdi 4390 call __ecp_nistz256_sqr_montx 4391 4392 movq 0+96(%rsp),%rdx 4393 movq 8+96(%rsp),%r14 4394 leaq -128+96(%rsp),%rsi 4395 movq 16+96(%rsp),%r15 4396 movq 24+96(%rsp),%r8 4397 leaq 192(%rsp),%rdi 4398 call __ecp_nistz256_sqr_montx 4399 4400 movq 128(%rsp),%rdx 4401 leaq 128(%rsp),%rbx 4402 movq 0+64(%rsp),%r9 4403 movq 8+64(%rsp),%r10 4404 leaq -128+64(%rsp),%rsi 4405 movq 16+64(%rsp),%r11 4406 movq 24+64(%rsp),%r12 4407 leaq 160(%rsp),%rdi 4408 call __ecp_nistz256_mul_montx 4409 4410 movq 320(%rsp),%rdx 4411 leaq 320(%rsp),%rbx 4412 movq 0+128(%rsp),%r9 4413 movq 8+128(%rsp),%r10 4414 leaq -128+128(%rsp),%rsi 4415 movq 16+128(%rsp),%r11 4416 movq 24+128(%rsp),%r12 4417 leaq 0(%rsp),%rdi 4418 call __ecp_nistz256_mul_montx 4419 4420 4421 4422 4423 xorq %r11,%r11 4424 addq %r12,%r12 4425 leaq 192(%rsp),%rsi 4426 adcq %r13,%r13 4427 movq %r12,%rax 4428 adcq %r8,%r8 4429 adcq %r9,%r9 4430 movq %r13,%rbp 4431 adcq $0,%r11 4432 4433 subq $-1,%r12 4434 movq %r8,%rcx 4435 sbbq %r14,%r13 4436 sbbq $0,%r8 4437 movq %r9,%r10 4438 sbbq %r15,%r9 4439 sbbq $0,%r11 4440 4441 cmovcq %rax,%r12 4442 movq 0(%rsi),%rax 4443 cmovcq %rbp,%r13 4444 movq 8(%rsi),%rbp 4445 cmovcq %rcx,%r8 4446 movq 16(%rsi),%rcx 4447 cmovcq %r10,%r9 4448 movq 24(%rsi),%r10 4449 4450 call __ecp_nistz256_subx 4451 4452 leaq 160(%rsp),%rbx 4453 leaq 224(%rsp),%rdi 4454 call __ecp_nistz256_sub_fromx 4455 4456 movq 0+0(%rsp),%rax 4457 movq 0+8(%rsp),%rbp 4458 movq 0+16(%rsp),%rcx 4459 movq 0+24(%rsp),%r10 4460 leaq 64(%rsp),%rdi 4461 4462 call __ecp_nistz256_subx 4463 4464 movq %r12,0(%rdi) 4465 movq %r13,8(%rdi) 4466 movq %r8,16(%rdi) 4467 movq %r9,24(%rdi) 4468 movq 352(%rsp),%rdx 4469 leaq 352(%rsp),%rbx 4470 movq 0+160(%rsp),%r9 4471 movq 8+160(%rsp),%r10 4472 leaq -128+160(%rsp),%rsi 4473 movq 16+160(%rsp),%r11 4474 movq 24+160(%rsp),%r12 4475 leaq 32(%rsp),%rdi 4476 call __ecp_nistz256_mul_montx 4477 4478 movq 96(%rsp),%rdx 4479 leaq 96(%rsp),%rbx 4480 movq 0+64(%rsp),%r9 4481 movq 8+64(%rsp),%r10 4482 leaq -128+64(%rsp),%rsi 4483 movq 16+64(%rsp),%r11 4484 movq 24+64(%rsp),%r12 4485 leaq 64(%rsp),%rdi 4486 call __ecp_nistz256_mul_montx 4487 4488 leaq 32(%rsp),%rbx 4489 leaq 256(%rsp),%rdi 4490 call __ecp_nistz256_sub_fromx 4491 4492.byte 102,72,15,126,199 4493 4494 movdqa %xmm5,%xmm0 4495 movdqa %xmm5,%xmm1 4496 pandn 288(%rsp),%xmm0 4497 movdqa %xmm5,%xmm2 4498 pandn 288+16(%rsp),%xmm1 4499 movdqa %xmm5,%xmm3 4500 pand .LONE_mont(%rip),%xmm2 4501 pand .LONE_mont+16(%rip),%xmm3 4502 por %xmm0,%xmm2 4503 por %xmm1,%xmm3 4504 4505 movdqa %xmm4,%xmm0 4506 movdqa %xmm4,%xmm1 4507 pandn %xmm2,%xmm0 4508 movdqa %xmm4,%xmm2 4509 pandn %xmm3,%xmm1 4510 movdqa %xmm4,%xmm3 4511 pand 384(%rsp),%xmm2 4512 pand 384+16(%rsp),%xmm3 4513 por %xmm0,%xmm2 4514 por %xmm1,%xmm3 4515 movdqu %xmm2,64(%rdi) 4516 movdqu %xmm3,80(%rdi) 4517 4518 movdqa %xmm5,%xmm0 4519 movdqa %xmm5,%xmm1 4520 pandn 224(%rsp),%xmm0 4521 movdqa %xmm5,%xmm2 4522 pandn 224+16(%rsp),%xmm1 4523 movdqa %xmm5,%xmm3 4524 pand 416(%rsp),%xmm2 4525 pand 416+16(%rsp),%xmm3 4526 por %xmm0,%xmm2 4527 por %xmm1,%xmm3 4528 4529 movdqa %xmm4,%xmm0 4530 movdqa %xmm4,%xmm1 4531 pandn %xmm2,%xmm0 4532 movdqa %xmm4,%xmm2 4533 pandn %xmm3,%xmm1 4534 movdqa %xmm4,%xmm3 4535 pand 320(%rsp),%xmm2 4536 pand 320+16(%rsp),%xmm3 4537 por %xmm0,%xmm2 4538 por %xmm1,%xmm3 4539 movdqu %xmm2,0(%rdi) 4540 movdqu %xmm3,16(%rdi) 4541 4542 movdqa %xmm5,%xmm0 4543 movdqa %xmm5,%xmm1 4544 pandn 256(%rsp),%xmm0 4545 movdqa %xmm5,%xmm2 4546 pandn 256+16(%rsp),%xmm1 4547 movdqa %xmm5,%xmm3 4548 pand 448(%rsp),%xmm2 4549 pand 448+16(%rsp),%xmm3 4550 por %xmm0,%xmm2 4551 por %xmm1,%xmm3 4552 4553 movdqa %xmm4,%xmm0 4554 movdqa %xmm4,%xmm1 4555 pandn %xmm2,%xmm0 4556 movdqa %xmm4,%xmm2 4557 pandn %xmm3,%xmm1 4558 movdqa %xmm4,%xmm3 4559 pand 352(%rsp),%xmm2 4560 pand 352+16(%rsp),%xmm3 4561 por %xmm0,%xmm2 4562 por %xmm1,%xmm3 4563 movdqu %xmm2,32(%rdi) 4564 movdqu %xmm3,48(%rdi) 4565 4566 leaq 480+56(%rsp),%rsi 4567.cfi_def_cfa %rsi,8 4568 movq -48(%rsi),%r15 4569.cfi_restore %r15 4570 movq -40(%rsi),%r14 4571.cfi_restore %r14 4572 movq -32(%rsi),%r13 4573.cfi_restore %r13 4574 movq -24(%rsi),%r12 4575.cfi_restore %r12 4576 movq -16(%rsi),%rbx 4577.cfi_restore %rbx 4578 movq -8(%rsi),%rbp 4579.cfi_restore %rbp 4580 leaq (%rsi),%rsp 4581.cfi_def_cfa_register %rsp 4582.Ladd_affinex_epilogue: 4583 .byte 0xf3,0xc3 4584.cfi_endproc 4585.size GFp_nistz256_point_add_affinex,.-GFp_nistz256_point_add_affinex 4586#endif 4587.section .note.GNU-stack,"",@progbits 4588