1#if defined(__x86_64__) 2.text 3 4 5 6.globl _bn_mul_mont 7.private_extern _bn_mul_mont 8 9.p2align 4 10_bn_mul_mont: 11 12 movl %r9d,%r9d 13 movq %rsp,%rax 14 15 testl $3,%r9d 16 jnz L$mul_enter 17 cmpl $8,%r9d 18 jb L$mul_enter 19 cmpq %rsi,%rdx 20 jne L$mul4x_enter 21 testl $7,%r9d 22 jz L$sqr8x_enter 23 jmp L$mul4x_enter 24 25.p2align 4 26L$mul_enter: 27 pushq %rbx 28 29 pushq %rbp 30 31 pushq %r12 32 33 pushq %r13 34 35 pushq %r14 36 37 pushq %r15 38 39 40 negq %r9 41 movq %rsp,%r11 42 leaq -16(%rsp,%r9,8),%r10 43 negq %r9 44 andq $-1024,%r10 45 46 47 48 49 50 51 52 53 54 subq %r10,%r11 55 andq $-4096,%r11 56 leaq (%r10,%r11,1),%rsp 57 movq (%rsp),%r11 58 cmpq %r10,%rsp 59 ja L$mul_page_walk 60 jmp L$mul_page_walk_done 61 62.p2align 4 63L$mul_page_walk: 64 leaq -4096(%rsp),%rsp 65 movq (%rsp),%r11 66 cmpq %r10,%rsp 67 ja L$mul_page_walk 68L$mul_page_walk_done: 69 70 movq %rax,8(%rsp,%r9,8) 71 72L$mul_body: 73 movq %rdx,%r12 74 movq (%r8),%r8 75 movq (%r12),%rbx 76 movq (%rsi),%rax 77 78 xorq %r14,%r14 79 xorq %r15,%r15 80 81 movq %r8,%rbp 82 mulq %rbx 83 movq %rax,%r10 84 movq (%rcx),%rax 85 86 imulq %r10,%rbp 87 movq %rdx,%r11 88 89 mulq %rbp 90 addq %rax,%r10 91 movq 8(%rsi),%rax 92 adcq $0,%rdx 93 movq %rdx,%r13 94 95 leaq 1(%r15),%r15 96 jmp L$1st_enter 97 98.p2align 4 99L$1st: 100 addq %rax,%r13 101 movq (%rsi,%r15,8),%rax 102 adcq $0,%rdx 103 addq %r11,%r13 104 movq %r10,%r11 105 adcq $0,%rdx 106 movq %r13,-16(%rsp,%r15,8) 107 movq %rdx,%r13 108 109L$1st_enter: 110 mulq %rbx 111 addq %rax,%r11 112 movq (%rcx,%r15,8),%rax 113 adcq $0,%rdx 114 leaq 1(%r15),%r15 115 movq %rdx,%r10 116 117 mulq %rbp 118 cmpq %r9,%r15 119 jne L$1st 120 121 addq %rax,%r13 122 movq (%rsi),%rax 123 adcq $0,%rdx 124 addq %r11,%r13 125 adcq $0,%rdx 126 movq %r13,-16(%rsp,%r15,8) 127 movq %rdx,%r13 128 movq %r10,%r11 129 130 xorq %rdx,%rdx 131 addq %r11,%r13 132 adcq $0,%rdx 133 movq %r13,-8(%rsp,%r9,8) 134 movq %rdx,(%rsp,%r9,8) 135 136 leaq 1(%r14),%r14 137 jmp L$outer 138.p2align 4 139L$outer: 140 movq (%r12,%r14,8),%rbx 141 xorq %r15,%r15 142 movq %r8,%rbp 143 movq (%rsp),%r10 144 mulq %rbx 145 addq %rax,%r10 146 movq (%rcx),%rax 147 adcq $0,%rdx 148 149 imulq %r10,%rbp 150 movq %rdx,%r11 151 152 mulq %rbp 153 addq %rax,%r10 154 movq 8(%rsi),%rax 155 adcq $0,%rdx 156 movq 8(%rsp),%r10 157 movq %rdx,%r13 158 159 leaq 1(%r15),%r15 160 jmp L$inner_enter 161 162.p2align 4 163L$inner: 164 addq %rax,%r13 165 movq (%rsi,%r15,8),%rax 166 adcq $0,%rdx 167 addq %r10,%r13 168 movq (%rsp,%r15,8),%r10 169 adcq $0,%rdx 170 movq %r13,-16(%rsp,%r15,8) 171 movq %rdx,%r13 172 173L$inner_enter: 174 mulq %rbx 175 addq %rax,%r11 176 movq (%rcx,%r15,8),%rax 177 adcq $0,%rdx 178 addq %r11,%r10 179 movq %rdx,%r11 180 adcq $0,%r11 181 leaq 1(%r15),%r15 182 183 mulq %rbp 184 cmpq %r9,%r15 185 jne L$inner 186 187 addq %rax,%r13 188 movq (%rsi),%rax 189 adcq $0,%rdx 190 addq %r10,%r13 191 movq (%rsp,%r15,8),%r10 192 adcq $0,%rdx 193 movq %r13,-16(%rsp,%r15,8) 194 movq %rdx,%r13 195 196 xorq %rdx,%rdx 197 addq %r11,%r13 198 adcq $0,%rdx 199 addq %r10,%r13 200 adcq $0,%rdx 201 movq %r13,-8(%rsp,%r9,8) 202 movq %rdx,(%rsp,%r9,8) 203 204 leaq 1(%r14),%r14 205 cmpq %r9,%r14 206 jb L$outer 207 208 xorq %r14,%r14 209 movq (%rsp),%rax 210 leaq (%rsp),%rsi 211 movq %r9,%r15 212 jmp L$sub 213.p2align 4 214L$sub: sbbq (%rcx,%r14,8),%rax 215 movq %rax,(%rdi,%r14,8) 216 movq 8(%rsi,%r14,8),%rax 217 leaq 1(%r14),%r14 218 decq %r15 219 jnz L$sub 220 221 sbbq $0,%rax 222 xorq %r14,%r14 223 andq %rax,%rsi 224 notq %rax 225 movq %rdi,%rcx 226 andq %rax,%rcx 227 movq %r9,%r15 228 orq %rcx,%rsi 229.p2align 4 230L$copy: 231 movq (%rsi,%r14,8),%rax 232 movq %r14,(%rsp,%r14,8) 233 movq %rax,(%rdi,%r14,8) 234 leaq 1(%r14),%r14 235 subq $1,%r15 236 jnz L$copy 237 238 movq 8(%rsp,%r9,8),%rsi 239 240 movq $1,%rax 241 movq -48(%rsi),%r15 242 243 movq -40(%rsi),%r14 244 245 movq -32(%rsi),%r13 246 247 movq -24(%rsi),%r12 248 249 movq -16(%rsi),%rbp 250 251 movq -8(%rsi),%rbx 252 253 leaq (%rsi),%rsp 254 255L$mul_epilogue: 256 .byte 0xf3,0xc3 257 258 259 260.p2align 4 261bn_mul4x_mont: 262 263 movl %r9d,%r9d 264 movq %rsp,%rax 265 266L$mul4x_enter: 267 pushq %rbx 268 269 pushq %rbp 270 271 pushq %r12 272 273 pushq %r13 274 275 pushq %r14 276 277 pushq %r15 278 279 280 negq %r9 281 movq %rsp,%r11 282 leaq -32(%rsp,%r9,8),%r10 283 negq %r9 284 andq $-1024,%r10 285 286 subq %r10,%r11 287 andq $-4096,%r11 288 leaq (%r10,%r11,1),%rsp 289 movq (%rsp),%r11 290 cmpq %r10,%rsp 291 ja L$mul4x_page_walk 292 jmp L$mul4x_page_walk_done 293 294L$mul4x_page_walk: 295 leaq -4096(%rsp),%rsp 296 movq (%rsp),%r11 297 cmpq %r10,%rsp 298 ja L$mul4x_page_walk 299L$mul4x_page_walk_done: 300 301 movq %rax,8(%rsp,%r9,8) 302 303L$mul4x_body: 304 movq %rdi,16(%rsp,%r9,8) 305 movq %rdx,%r12 306 movq (%r8),%r8 307 movq (%r12),%rbx 308 movq (%rsi),%rax 309 310 xorq %r14,%r14 311 xorq %r15,%r15 312 313 movq %r8,%rbp 314 mulq %rbx 315 movq %rax,%r10 316 movq (%rcx),%rax 317 318 imulq %r10,%rbp 319 movq %rdx,%r11 320 321 mulq %rbp 322 addq %rax,%r10 323 movq 8(%rsi),%rax 324 adcq $0,%rdx 325 movq %rdx,%rdi 326 327 mulq %rbx 328 addq %rax,%r11 329 movq 8(%rcx),%rax 330 adcq $0,%rdx 331 movq %rdx,%r10 332 333 mulq %rbp 334 addq %rax,%rdi 335 movq 16(%rsi),%rax 336 adcq $0,%rdx 337 addq %r11,%rdi 338 leaq 4(%r15),%r15 339 adcq $0,%rdx 340 movq %rdi,(%rsp) 341 movq %rdx,%r13 342 jmp L$1st4x 343.p2align 4 344L$1st4x: 345 mulq %rbx 346 addq %rax,%r10 347 movq -16(%rcx,%r15,8),%rax 348 adcq $0,%rdx 349 movq %rdx,%r11 350 351 mulq %rbp 352 addq %rax,%r13 353 movq -8(%rsi,%r15,8),%rax 354 adcq $0,%rdx 355 addq %r10,%r13 356 adcq $0,%rdx 357 movq %r13,-24(%rsp,%r15,8) 358 movq %rdx,%rdi 359 360 mulq %rbx 361 addq %rax,%r11 362 movq -8(%rcx,%r15,8),%rax 363 adcq $0,%rdx 364 movq %rdx,%r10 365 366 mulq %rbp 367 addq %rax,%rdi 368 movq (%rsi,%r15,8),%rax 369 adcq $0,%rdx 370 addq %r11,%rdi 371 adcq $0,%rdx 372 movq %rdi,-16(%rsp,%r15,8) 373 movq %rdx,%r13 374 375 mulq %rbx 376 addq %rax,%r10 377 movq (%rcx,%r15,8),%rax 378 adcq $0,%rdx 379 movq %rdx,%r11 380 381 mulq %rbp 382 addq %rax,%r13 383 movq 8(%rsi,%r15,8),%rax 384 adcq $0,%rdx 385 addq %r10,%r13 386 adcq $0,%rdx 387 movq %r13,-8(%rsp,%r15,8) 388 movq %rdx,%rdi 389 390 mulq %rbx 391 addq %rax,%r11 392 movq 8(%rcx,%r15,8),%rax 393 adcq $0,%rdx 394 leaq 4(%r15),%r15 395 movq %rdx,%r10 396 397 mulq %rbp 398 addq %rax,%rdi 399 movq -16(%rsi,%r15,8),%rax 400 adcq $0,%rdx 401 addq %r11,%rdi 402 adcq $0,%rdx 403 movq %rdi,-32(%rsp,%r15,8) 404 movq %rdx,%r13 405 cmpq %r9,%r15 406 jb L$1st4x 407 408 mulq %rbx 409 addq %rax,%r10 410 movq -16(%rcx,%r15,8),%rax 411 adcq $0,%rdx 412 movq %rdx,%r11 413 414 mulq %rbp 415 addq %rax,%r13 416 movq -8(%rsi,%r15,8),%rax 417 adcq $0,%rdx 418 addq %r10,%r13 419 adcq $0,%rdx 420 movq %r13,-24(%rsp,%r15,8) 421 movq %rdx,%rdi 422 423 mulq %rbx 424 addq %rax,%r11 425 movq -8(%rcx,%r15,8),%rax 426 adcq $0,%rdx 427 movq %rdx,%r10 428 429 mulq %rbp 430 addq %rax,%rdi 431 movq (%rsi),%rax 432 adcq $0,%rdx 433 addq %r11,%rdi 434 adcq $0,%rdx 435 movq %rdi,-16(%rsp,%r15,8) 436 movq %rdx,%r13 437 438 xorq %rdi,%rdi 439 addq %r10,%r13 440 adcq $0,%rdi 441 movq %r13,-8(%rsp,%r15,8) 442 movq %rdi,(%rsp,%r15,8) 443 444 leaq 1(%r14),%r14 445.p2align 2 446L$outer4x: 447 movq (%r12,%r14,8),%rbx 448 xorq %r15,%r15 449 movq (%rsp),%r10 450 movq %r8,%rbp 451 mulq %rbx 452 addq %rax,%r10 453 movq (%rcx),%rax 454 adcq $0,%rdx 455 456 imulq %r10,%rbp 457 movq %rdx,%r11 458 459 mulq %rbp 460 addq %rax,%r10 461 movq 8(%rsi),%rax 462 adcq $0,%rdx 463 movq %rdx,%rdi 464 465 mulq %rbx 466 addq %rax,%r11 467 movq 8(%rcx),%rax 468 adcq $0,%rdx 469 addq 8(%rsp),%r11 470 adcq $0,%rdx 471 movq %rdx,%r10 472 473 mulq %rbp 474 addq %rax,%rdi 475 movq 16(%rsi),%rax 476 adcq $0,%rdx 477 addq %r11,%rdi 478 leaq 4(%r15),%r15 479 adcq $0,%rdx 480 movq %rdi,(%rsp) 481 movq %rdx,%r13 482 jmp L$inner4x 483.p2align 4 484L$inner4x: 485 mulq %rbx 486 addq %rax,%r10 487 movq -16(%rcx,%r15,8),%rax 488 adcq $0,%rdx 489 addq -16(%rsp,%r15,8),%r10 490 adcq $0,%rdx 491 movq %rdx,%r11 492 493 mulq %rbp 494 addq %rax,%r13 495 movq -8(%rsi,%r15,8),%rax 496 adcq $0,%rdx 497 addq %r10,%r13 498 adcq $0,%rdx 499 movq %r13,-24(%rsp,%r15,8) 500 movq %rdx,%rdi 501 502 mulq %rbx 503 addq %rax,%r11 504 movq -8(%rcx,%r15,8),%rax 505 adcq $0,%rdx 506 addq -8(%rsp,%r15,8),%r11 507 adcq $0,%rdx 508 movq %rdx,%r10 509 510 mulq %rbp 511 addq %rax,%rdi 512 movq (%rsi,%r15,8),%rax 513 adcq $0,%rdx 514 addq %r11,%rdi 515 adcq $0,%rdx 516 movq %rdi,-16(%rsp,%r15,8) 517 movq %rdx,%r13 518 519 mulq %rbx 520 addq %rax,%r10 521 movq (%rcx,%r15,8),%rax 522 adcq $0,%rdx 523 addq (%rsp,%r15,8),%r10 524 adcq $0,%rdx 525 movq %rdx,%r11 526 527 mulq %rbp 528 addq %rax,%r13 529 movq 8(%rsi,%r15,8),%rax 530 adcq $0,%rdx 531 addq %r10,%r13 532 adcq $0,%rdx 533 movq %r13,-8(%rsp,%r15,8) 534 movq %rdx,%rdi 535 536 mulq %rbx 537 addq %rax,%r11 538 movq 8(%rcx,%r15,8),%rax 539 adcq $0,%rdx 540 addq 8(%rsp,%r15,8),%r11 541 adcq $0,%rdx 542 leaq 4(%r15),%r15 543 movq %rdx,%r10 544 545 mulq %rbp 546 addq %rax,%rdi 547 movq -16(%rsi,%r15,8),%rax 548 adcq $0,%rdx 549 addq %r11,%rdi 550 adcq $0,%rdx 551 movq %rdi,-32(%rsp,%r15,8) 552 movq %rdx,%r13 553 cmpq %r9,%r15 554 jb L$inner4x 555 556 mulq %rbx 557 addq %rax,%r10 558 movq -16(%rcx,%r15,8),%rax 559 adcq $0,%rdx 560 addq -16(%rsp,%r15,8),%r10 561 adcq $0,%rdx 562 movq %rdx,%r11 563 564 mulq %rbp 565 addq %rax,%r13 566 movq -8(%rsi,%r15,8),%rax 567 adcq $0,%rdx 568 addq %r10,%r13 569 adcq $0,%rdx 570 movq %r13,-24(%rsp,%r15,8) 571 movq %rdx,%rdi 572 573 mulq %rbx 574 addq %rax,%r11 575 movq -8(%rcx,%r15,8),%rax 576 adcq $0,%rdx 577 addq -8(%rsp,%r15,8),%r11 578 adcq $0,%rdx 579 leaq 1(%r14),%r14 580 movq %rdx,%r10 581 582 mulq %rbp 583 addq %rax,%rdi 584 movq (%rsi),%rax 585 adcq $0,%rdx 586 addq %r11,%rdi 587 adcq $0,%rdx 588 movq %rdi,-16(%rsp,%r15,8) 589 movq %rdx,%r13 590 591 xorq %rdi,%rdi 592 addq %r10,%r13 593 adcq $0,%rdi 594 addq (%rsp,%r9,8),%r13 595 adcq $0,%rdi 596 movq %r13,-8(%rsp,%r15,8) 597 movq %rdi,(%rsp,%r15,8) 598 599 cmpq %r9,%r14 600 jb L$outer4x 601 movq 16(%rsp,%r9,8),%rdi 602 leaq -4(%r9),%r15 603 movq 0(%rsp),%rax 604 pxor %xmm0,%xmm0 605 movq 8(%rsp),%rdx 606 shrq $2,%r15 607 leaq (%rsp),%rsi 608 xorq %r14,%r14 609 610 subq 0(%rcx),%rax 611 movq 16(%rsi),%rbx 612 movq 24(%rsi),%rbp 613 sbbq 8(%rcx),%rdx 614 jmp L$sub4x 615.p2align 4 616L$sub4x: 617 movq %rax,0(%rdi,%r14,8) 618 movq %rdx,8(%rdi,%r14,8) 619 sbbq 16(%rcx,%r14,8),%rbx 620 movq 32(%rsi,%r14,8),%rax 621 movq 40(%rsi,%r14,8),%rdx 622 sbbq 24(%rcx,%r14,8),%rbp 623 movq %rbx,16(%rdi,%r14,8) 624 movq %rbp,24(%rdi,%r14,8) 625 sbbq 32(%rcx,%r14,8),%rax 626 movq 48(%rsi,%r14,8),%rbx 627 movq 56(%rsi,%r14,8),%rbp 628 sbbq 40(%rcx,%r14,8),%rdx 629 leaq 4(%r14),%r14 630 decq %r15 631 jnz L$sub4x 632 633 movq %rax,0(%rdi,%r14,8) 634 movq 32(%rsi,%r14,8),%rax 635 sbbq 16(%rcx,%r14,8),%rbx 636 movq %rdx,8(%rdi,%r14,8) 637 sbbq 24(%rcx,%r14,8),%rbp 638 movq %rbx,16(%rdi,%r14,8) 639 640 sbbq $0,%rax 641 movq %rbp,24(%rdi,%r14,8) 642 xorq %r14,%r14 643 andq %rax,%rsi 644 notq %rax 645 movq %rdi,%rcx 646 andq %rax,%rcx 647 leaq -4(%r9),%r15 648 orq %rcx,%rsi 649 shrq $2,%r15 650 651 movdqu (%rsi),%xmm1 652 movdqa %xmm0,(%rsp) 653 movdqu %xmm1,(%rdi) 654 jmp L$copy4x 655.p2align 4 656L$copy4x: 657 movdqu 16(%rsi,%r14,1),%xmm2 658 movdqu 32(%rsi,%r14,1),%xmm1 659 movdqa %xmm0,16(%rsp,%r14,1) 660 movdqu %xmm2,16(%rdi,%r14,1) 661 movdqa %xmm0,32(%rsp,%r14,1) 662 movdqu %xmm1,32(%rdi,%r14,1) 663 leaq 32(%r14),%r14 664 decq %r15 665 jnz L$copy4x 666 667 movdqu 16(%rsi,%r14,1),%xmm2 668 movdqa %xmm0,16(%rsp,%r14,1) 669 movdqu %xmm2,16(%rdi,%r14,1) 670 movq 8(%rsp,%r9,8),%rsi 671 672 movq $1,%rax 673 movq -48(%rsi),%r15 674 675 movq -40(%rsi),%r14 676 677 movq -32(%rsi),%r13 678 679 movq -24(%rsi),%r12 680 681 movq -16(%rsi),%rbp 682 683 movq -8(%rsi),%rbx 684 685 leaq (%rsi),%rsp 686 687L$mul4x_epilogue: 688 .byte 0xf3,0xc3 689 690 691 692 693 694.p2align 5 695bn_sqr8x_mont: 696 697 movq %rsp,%rax 698 699L$sqr8x_enter: 700 pushq %rbx 701 702 pushq %rbp 703 704 pushq %r12 705 706 pushq %r13 707 708 pushq %r14 709 710 pushq %r15 711 712L$sqr8x_prologue: 713 714 movl %r9d,%r10d 715 shll $3,%r9d 716 shlq $3+2,%r10 717 negq %r9 718 719 720 721 722 723 724 leaq -64(%rsp,%r9,2),%r11 725 movq %rsp,%rbp 726 movq (%r8),%r8 727 subq %rsi,%r11 728 andq $4095,%r11 729 cmpq %r11,%r10 730 jb L$sqr8x_sp_alt 731 subq %r11,%rbp 732 leaq -64(%rbp,%r9,2),%rbp 733 jmp L$sqr8x_sp_done 734 735.p2align 5 736L$sqr8x_sp_alt: 737 leaq 4096-64(,%r9,2),%r10 738 leaq -64(%rbp,%r9,2),%rbp 739 subq %r10,%r11 740 movq $0,%r10 741 cmovcq %r10,%r11 742 subq %r11,%rbp 743L$sqr8x_sp_done: 744 andq $-64,%rbp 745 movq %rsp,%r11 746 subq %rbp,%r11 747 andq $-4096,%r11 748 leaq (%r11,%rbp,1),%rsp 749 movq (%rsp),%r10 750 cmpq %rbp,%rsp 751 ja L$sqr8x_page_walk 752 jmp L$sqr8x_page_walk_done 753 754.p2align 4 755L$sqr8x_page_walk: 756 leaq -4096(%rsp),%rsp 757 movq (%rsp),%r10 758 cmpq %rbp,%rsp 759 ja L$sqr8x_page_walk 760L$sqr8x_page_walk_done: 761 762 movq %r9,%r10 763 negq %r9 764 765 movq %r8,32(%rsp) 766 movq %rax,40(%rsp) 767 768L$sqr8x_body: 769 770.byte 102,72,15,110,209 771 pxor %xmm0,%xmm0 772.byte 102,72,15,110,207 773.byte 102,73,15,110,218 774 call _bn_sqr8x_internal 775 776 777 778 779 leaq (%rdi,%r9,1),%rbx 780 movq %r9,%rcx 781 movq %r9,%rdx 782.byte 102,72,15,126,207 783 sarq $3+2,%rcx 784 jmp L$sqr8x_sub 785 786.p2align 5 787L$sqr8x_sub: 788 movq 0(%rbx),%r12 789 movq 8(%rbx),%r13 790 movq 16(%rbx),%r14 791 movq 24(%rbx),%r15 792 leaq 32(%rbx),%rbx 793 sbbq 0(%rbp),%r12 794 sbbq 8(%rbp),%r13 795 sbbq 16(%rbp),%r14 796 sbbq 24(%rbp),%r15 797 leaq 32(%rbp),%rbp 798 movq %r12,0(%rdi) 799 movq %r13,8(%rdi) 800 movq %r14,16(%rdi) 801 movq %r15,24(%rdi) 802 leaq 32(%rdi),%rdi 803 incq %rcx 804 jnz L$sqr8x_sub 805 806 sbbq $0,%rax 807 leaq (%rbx,%r9,1),%rbx 808 leaq (%rdi,%r9,1),%rdi 809 810.byte 102,72,15,110,200 811 pxor %xmm0,%xmm0 812 pshufd $0,%xmm1,%xmm1 813 movq 40(%rsp),%rsi 814 815 jmp L$sqr8x_cond_copy 816 817.p2align 5 818L$sqr8x_cond_copy: 819 movdqa 0(%rbx),%xmm2 820 movdqa 16(%rbx),%xmm3 821 leaq 32(%rbx),%rbx 822 movdqu 0(%rdi),%xmm4 823 movdqu 16(%rdi),%xmm5 824 leaq 32(%rdi),%rdi 825 movdqa %xmm0,-32(%rbx) 826 movdqa %xmm0,-16(%rbx) 827 movdqa %xmm0,-32(%rbx,%rdx,1) 828 movdqa %xmm0,-16(%rbx,%rdx,1) 829 pcmpeqd %xmm1,%xmm0 830 pand %xmm1,%xmm2 831 pand %xmm1,%xmm3 832 pand %xmm0,%xmm4 833 pand %xmm0,%xmm5 834 pxor %xmm0,%xmm0 835 por %xmm2,%xmm4 836 por %xmm3,%xmm5 837 movdqu %xmm4,-32(%rdi) 838 movdqu %xmm5,-16(%rdi) 839 addq $32,%r9 840 jnz L$sqr8x_cond_copy 841 842 movq $1,%rax 843 movq -48(%rsi),%r15 844 845 movq -40(%rsi),%r14 846 847 movq -32(%rsi),%r13 848 849 movq -24(%rsi),%r12 850 851 movq -16(%rsi),%rbp 852 853 movq -8(%rsi),%rbx 854 855 leaq (%rsi),%rsp 856 857L$sqr8x_epilogue: 858 .byte 0xf3,0xc3 859 860 861.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 862.p2align 4 863#endif 864