1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11#if defined(BORINGSSL_PREFIX) 12#include <boringssl_prefix_symbols_asm.h> 13#endif 14.text 15 16 17 18.globl _bn_mul_mont_gather5 19.private_extern _bn_mul_mont_gather5 20 21.p2align 6 22_bn_mul_mont_gather5: 23 24 movl %r9d,%r9d 25 movq %rsp,%rax 26 27 testl $7,%r9d 28 jnz L$mul_enter 29 leaq _OPENSSL_ia32cap_P(%rip),%r11 30 movl 8(%r11),%r11d 31 jmp L$mul4x_enter 32 33.p2align 4 34L$mul_enter: 35 movd 8(%rsp),%xmm5 36 pushq %rbx 37 38 pushq %rbp 39 40 pushq %r12 41 42 pushq %r13 43 44 pushq %r14 45 46 pushq %r15 47 48 49 negq %r9 50 movq %rsp,%r11 51 leaq -280(%rsp,%r9,8),%r10 52 negq %r9 53 andq $-1024,%r10 54 55 56 57 58 59 60 61 62 63 subq %r10,%r11 64 andq $-4096,%r11 65 leaq (%r10,%r11,1),%rsp 66 movq (%rsp),%r11 67 cmpq %r10,%rsp 68 ja L$mul_page_walk 69 jmp L$mul_page_walk_done 70 71L$mul_page_walk: 72 leaq -4096(%rsp),%rsp 73 movq (%rsp),%r11 74 cmpq %r10,%rsp 75 ja L$mul_page_walk 76L$mul_page_walk_done: 77 78 leaq L$inc(%rip),%r10 79 movq %rax,8(%rsp,%r9,8) 80 81L$mul_body: 82 83 leaq 128(%rdx),%r12 84 movdqa 0(%r10),%xmm0 85 movdqa 16(%r10),%xmm1 86 leaq 24-112(%rsp,%r9,8),%r10 87 andq $-16,%r10 88 89 pshufd $0,%xmm5,%xmm5 90 movdqa %xmm1,%xmm4 91 movdqa %xmm1,%xmm2 92 paddd %xmm0,%xmm1 93 pcmpeqd %xmm5,%xmm0 94.byte 0x67 95 movdqa %xmm4,%xmm3 96 paddd %xmm1,%xmm2 97 pcmpeqd %xmm5,%xmm1 98 movdqa %xmm0,112(%r10) 99 movdqa %xmm4,%xmm0 100 101 paddd %xmm2,%xmm3 102 pcmpeqd %xmm5,%xmm2 103 movdqa %xmm1,128(%r10) 104 movdqa %xmm4,%xmm1 105 106 paddd %xmm3,%xmm0 107 pcmpeqd %xmm5,%xmm3 108 movdqa %xmm2,144(%r10) 109 movdqa %xmm4,%xmm2 110 111 paddd %xmm0,%xmm1 112 pcmpeqd %xmm5,%xmm0 113 movdqa %xmm3,160(%r10) 114 movdqa %xmm4,%xmm3 115 paddd %xmm1,%xmm2 116 pcmpeqd %xmm5,%xmm1 117 movdqa %xmm0,176(%r10) 118 movdqa %xmm4,%xmm0 119 120 paddd %xmm2,%xmm3 121 pcmpeqd %xmm5,%xmm2 122 movdqa %xmm1,192(%r10) 123 movdqa %xmm4,%xmm1 124 125 paddd %xmm3,%xmm0 126 pcmpeqd %xmm5,%xmm3 127 movdqa %xmm2,208(%r10) 128 movdqa %xmm4,%xmm2 129 130 paddd %xmm0,%xmm1 131 pcmpeqd %xmm5,%xmm0 132 movdqa %xmm3,224(%r10) 133 movdqa %xmm4,%xmm3 134 paddd %xmm1,%xmm2 135 pcmpeqd %xmm5,%xmm1 136 movdqa %xmm0,240(%r10) 137 movdqa %xmm4,%xmm0 138 139 paddd %xmm2,%xmm3 140 pcmpeqd %xmm5,%xmm2 141 movdqa %xmm1,256(%r10) 142 movdqa %xmm4,%xmm1 143 144 paddd %xmm3,%xmm0 145 pcmpeqd %xmm5,%xmm3 146 movdqa %xmm2,272(%r10) 147 movdqa %xmm4,%xmm2 148 149 paddd %xmm0,%xmm1 150 pcmpeqd %xmm5,%xmm0 151 movdqa %xmm3,288(%r10) 152 movdqa %xmm4,%xmm3 153 paddd %xmm1,%xmm2 154 pcmpeqd %xmm5,%xmm1 155 movdqa %xmm0,304(%r10) 156 157 paddd %xmm2,%xmm3 158.byte 0x67 159 pcmpeqd %xmm5,%xmm2 160 movdqa %xmm1,320(%r10) 161 162 pcmpeqd %xmm5,%xmm3 163 movdqa %xmm2,336(%r10) 164 pand 64(%r12),%xmm0 165 166 pand 80(%r12),%xmm1 167 pand 96(%r12),%xmm2 168 movdqa %xmm3,352(%r10) 169 pand 112(%r12),%xmm3 170 por %xmm2,%xmm0 171 por %xmm3,%xmm1 172 movdqa -128(%r12),%xmm4 173 movdqa -112(%r12),%xmm5 174 movdqa -96(%r12),%xmm2 175 pand 112(%r10),%xmm4 176 movdqa -80(%r12),%xmm3 177 pand 128(%r10),%xmm5 178 por %xmm4,%xmm0 179 pand 144(%r10),%xmm2 180 por %xmm5,%xmm1 181 pand 160(%r10),%xmm3 182 por %xmm2,%xmm0 183 por %xmm3,%xmm1 184 movdqa -64(%r12),%xmm4 185 movdqa -48(%r12),%xmm5 186 movdqa -32(%r12),%xmm2 187 pand 176(%r10),%xmm4 188 movdqa -16(%r12),%xmm3 189 pand 192(%r10),%xmm5 190 por %xmm4,%xmm0 191 pand 208(%r10),%xmm2 192 por %xmm5,%xmm1 193 pand 224(%r10),%xmm3 194 por %xmm2,%xmm0 195 por %xmm3,%xmm1 196 movdqa 0(%r12),%xmm4 197 movdqa 16(%r12),%xmm5 198 movdqa 32(%r12),%xmm2 199 pand 240(%r10),%xmm4 200 movdqa 48(%r12),%xmm3 201 pand 256(%r10),%xmm5 202 por %xmm4,%xmm0 203 pand 272(%r10),%xmm2 204 por %xmm5,%xmm1 205 pand 288(%r10),%xmm3 206 por %xmm2,%xmm0 207 por %xmm3,%xmm1 208 por %xmm1,%xmm0 209 pshufd $0x4e,%xmm0,%xmm1 210 por %xmm1,%xmm0 211 leaq 256(%r12),%r12 212.byte 102,72,15,126,195 213 214 movq (%r8),%r8 215 movq (%rsi),%rax 216 217 xorq %r14,%r14 218 xorq %r15,%r15 219 220 movq %r8,%rbp 221 mulq %rbx 222 movq %rax,%r10 223 movq (%rcx),%rax 224 225 imulq %r10,%rbp 226 movq %rdx,%r11 227 228 mulq %rbp 229 addq %rax,%r10 230 movq 8(%rsi),%rax 231 adcq $0,%rdx 232 movq %rdx,%r13 233 234 leaq 1(%r15),%r15 235 jmp L$1st_enter 236 237.p2align 4 238L$1st: 239 addq %rax,%r13 240 movq (%rsi,%r15,8),%rax 241 adcq $0,%rdx 242 addq %r11,%r13 243 movq %r10,%r11 244 adcq $0,%rdx 245 movq %r13,-16(%rsp,%r15,8) 246 movq %rdx,%r13 247 248L$1st_enter: 249 mulq %rbx 250 addq %rax,%r11 251 movq (%rcx,%r15,8),%rax 252 adcq $0,%rdx 253 leaq 1(%r15),%r15 254 movq %rdx,%r10 255 256 mulq %rbp 257 cmpq %r9,%r15 258 jne L$1st 259 260 261 addq %rax,%r13 262 adcq $0,%rdx 263 addq %r11,%r13 264 adcq $0,%rdx 265 movq %r13,-16(%rsp,%r9,8) 266 movq %rdx,%r13 267 movq %r10,%r11 268 269 xorq %rdx,%rdx 270 addq %r11,%r13 271 adcq $0,%rdx 272 movq %r13,-8(%rsp,%r9,8) 273 movq %rdx,(%rsp,%r9,8) 274 275 leaq 1(%r14),%r14 276 jmp L$outer 277.p2align 4 278L$outer: 279 leaq 24+128(%rsp,%r9,8),%rdx 280 andq $-16,%rdx 281 pxor %xmm4,%xmm4 282 pxor %xmm5,%xmm5 283 movdqa -128(%r12),%xmm0 284 movdqa -112(%r12),%xmm1 285 movdqa -96(%r12),%xmm2 286 movdqa -80(%r12),%xmm3 287 pand -128(%rdx),%xmm0 288 pand -112(%rdx),%xmm1 289 por %xmm0,%xmm4 290 pand -96(%rdx),%xmm2 291 por %xmm1,%xmm5 292 pand -80(%rdx),%xmm3 293 por %xmm2,%xmm4 294 por %xmm3,%xmm5 295 movdqa -64(%r12),%xmm0 296 movdqa -48(%r12),%xmm1 297 movdqa -32(%r12),%xmm2 298 movdqa -16(%r12),%xmm3 299 pand -64(%rdx),%xmm0 300 pand -48(%rdx),%xmm1 301 por %xmm0,%xmm4 302 pand -32(%rdx),%xmm2 303 por %xmm1,%xmm5 304 pand -16(%rdx),%xmm3 305 por %xmm2,%xmm4 306 por %xmm3,%xmm5 307 movdqa 0(%r12),%xmm0 308 movdqa 16(%r12),%xmm1 309 movdqa 32(%r12),%xmm2 310 movdqa 48(%r12),%xmm3 311 pand 0(%rdx),%xmm0 312 pand 16(%rdx),%xmm1 313 por %xmm0,%xmm4 314 pand 32(%rdx),%xmm2 315 por %xmm1,%xmm5 316 pand 48(%rdx),%xmm3 317 por %xmm2,%xmm4 318 por %xmm3,%xmm5 319 movdqa 64(%r12),%xmm0 320 movdqa 80(%r12),%xmm1 321 movdqa 96(%r12),%xmm2 322 movdqa 112(%r12),%xmm3 323 pand 64(%rdx),%xmm0 324 pand 80(%rdx),%xmm1 325 por %xmm0,%xmm4 326 pand 96(%rdx),%xmm2 327 por %xmm1,%xmm5 328 pand 112(%rdx),%xmm3 329 por %xmm2,%xmm4 330 por %xmm3,%xmm5 331 por %xmm5,%xmm4 332 pshufd $0x4e,%xmm4,%xmm0 333 por %xmm4,%xmm0 334 leaq 256(%r12),%r12 335 336 movq (%rsi),%rax 337.byte 102,72,15,126,195 338 339 xorq %r15,%r15 340 movq %r8,%rbp 341 movq (%rsp),%r10 342 343 mulq %rbx 344 addq %rax,%r10 345 movq (%rcx),%rax 346 adcq $0,%rdx 347 348 imulq %r10,%rbp 349 movq %rdx,%r11 350 351 mulq %rbp 352 addq %rax,%r10 353 movq 8(%rsi),%rax 354 adcq $0,%rdx 355 movq 8(%rsp),%r10 356 movq %rdx,%r13 357 358 leaq 1(%r15),%r15 359 jmp L$inner_enter 360 361.p2align 4 362L$inner: 363 addq %rax,%r13 364 movq (%rsi,%r15,8),%rax 365 adcq $0,%rdx 366 addq %r10,%r13 367 movq (%rsp,%r15,8),%r10 368 adcq $0,%rdx 369 movq %r13,-16(%rsp,%r15,8) 370 movq %rdx,%r13 371 372L$inner_enter: 373 mulq %rbx 374 addq %rax,%r11 375 movq (%rcx,%r15,8),%rax 376 adcq $0,%rdx 377 addq %r11,%r10 378 movq %rdx,%r11 379 adcq $0,%r11 380 leaq 1(%r15),%r15 381 382 mulq %rbp 383 cmpq %r9,%r15 384 jne L$inner 385 386 addq %rax,%r13 387 adcq $0,%rdx 388 addq %r10,%r13 389 movq (%rsp,%r9,8),%r10 390 adcq $0,%rdx 391 movq %r13,-16(%rsp,%r9,8) 392 movq %rdx,%r13 393 394 xorq %rdx,%rdx 395 addq %r11,%r13 396 adcq $0,%rdx 397 addq %r10,%r13 398 adcq $0,%rdx 399 movq %r13,-8(%rsp,%r9,8) 400 movq %rdx,(%rsp,%r9,8) 401 402 leaq 1(%r14),%r14 403 cmpq %r9,%r14 404 jb L$outer 405 406 xorq %r14,%r14 407 movq (%rsp),%rax 408 leaq (%rsp),%rsi 409 movq %r9,%r15 410 jmp L$sub 411.p2align 4 412L$sub: sbbq (%rcx,%r14,8),%rax 413 movq %rax,(%rdi,%r14,8) 414 movq 8(%rsi,%r14,8),%rax 415 leaq 1(%r14),%r14 416 decq %r15 417 jnz L$sub 418 419 sbbq $0,%rax 420 movq $-1,%rbx 421 xorq %rax,%rbx 422 xorq %r14,%r14 423 movq %r9,%r15 424 425L$copy: 426 movq (%rdi,%r14,8),%rcx 427 movq (%rsp,%r14,8),%rdx 428 andq %rbx,%rcx 429 andq %rax,%rdx 430 movq %r14,(%rsp,%r14,8) 431 orq %rcx,%rdx 432 movq %rdx,(%rdi,%r14,8) 433 leaq 1(%r14),%r14 434 subq $1,%r15 435 jnz L$copy 436 437 movq 8(%rsp,%r9,8),%rsi 438 439 movq $1,%rax 440 441 movq -48(%rsi),%r15 442 443 movq -40(%rsi),%r14 444 445 movq -32(%rsi),%r13 446 447 movq -24(%rsi),%r12 448 449 movq -16(%rsi),%rbp 450 451 movq -8(%rsi),%rbx 452 453 leaq (%rsi),%rsp 454 455L$mul_epilogue: 456 .byte 0xf3,0xc3 457 458 459 460.p2align 5 461bn_mul4x_mont_gather5: 462 463.byte 0x67 464 movq %rsp,%rax 465 466L$mul4x_enter: 467 andl $0x80108,%r11d 468 cmpl $0x80108,%r11d 469 je L$mulx4x_enter 470 pushq %rbx 471 472 pushq %rbp 473 474 pushq %r12 475 476 pushq %r13 477 478 pushq %r14 479 480 pushq %r15 481 482L$mul4x_prologue: 483 484.byte 0x67 485 shll $3,%r9d 486 leaq (%r9,%r9,2),%r10 487 negq %r9 488 489 490 491 492 493 494 495 496 497 498 leaq -320(%rsp,%r9,2),%r11 499 movq %rsp,%rbp 500 subq %rdi,%r11 501 andq $4095,%r11 502 cmpq %r11,%r10 503 jb L$mul4xsp_alt 504 subq %r11,%rbp 505 leaq -320(%rbp,%r9,2),%rbp 506 jmp L$mul4xsp_done 507 508.p2align 5 509L$mul4xsp_alt: 510 leaq 4096-320(,%r9,2),%r10 511 leaq -320(%rbp,%r9,2),%rbp 512 subq %r10,%r11 513 movq $0,%r10 514 cmovcq %r10,%r11 515 subq %r11,%rbp 516L$mul4xsp_done: 517 andq $-64,%rbp 518 movq %rsp,%r11 519 subq %rbp,%r11 520 andq $-4096,%r11 521 leaq (%r11,%rbp,1),%rsp 522 movq (%rsp),%r10 523 cmpq %rbp,%rsp 524 ja L$mul4x_page_walk 525 jmp L$mul4x_page_walk_done 526 527L$mul4x_page_walk: 528 leaq -4096(%rsp),%rsp 529 movq (%rsp),%r10 530 cmpq %rbp,%rsp 531 ja L$mul4x_page_walk 532L$mul4x_page_walk_done: 533 534 negq %r9 535 536 movq %rax,40(%rsp) 537 538L$mul4x_body: 539 540 call mul4x_internal 541 542 movq 40(%rsp),%rsi 543 544 movq $1,%rax 545 546 movq -48(%rsi),%r15 547 548 movq -40(%rsi),%r14 549 550 movq -32(%rsi),%r13 551 552 movq -24(%rsi),%r12 553 554 movq -16(%rsi),%rbp 555 556 movq -8(%rsi),%rbx 557 558 leaq (%rsi),%rsp 559 560L$mul4x_epilogue: 561 .byte 0xf3,0xc3 562 563 564 565 566.p2align 5 567mul4x_internal: 568 569 shlq $5,%r9 570 movd 8(%rax),%xmm5 571 leaq L$inc(%rip),%rax 572 leaq 128(%rdx,%r9,1),%r13 573 shrq $5,%r9 574 movdqa 0(%rax),%xmm0 575 movdqa 16(%rax),%xmm1 576 leaq 88-112(%rsp,%r9,1),%r10 577 leaq 128(%rdx),%r12 578 579 pshufd $0,%xmm5,%xmm5 580 movdqa %xmm1,%xmm4 581.byte 0x67,0x67 582 movdqa %xmm1,%xmm2 583 paddd %xmm0,%xmm1 584 pcmpeqd %xmm5,%xmm0 585.byte 0x67 586 movdqa %xmm4,%xmm3 587 paddd %xmm1,%xmm2 588 pcmpeqd %xmm5,%xmm1 589 movdqa %xmm0,112(%r10) 590 movdqa %xmm4,%xmm0 591 592 paddd %xmm2,%xmm3 593 pcmpeqd %xmm5,%xmm2 594 movdqa %xmm1,128(%r10) 595 movdqa %xmm4,%xmm1 596 597 paddd %xmm3,%xmm0 598 pcmpeqd %xmm5,%xmm3 599 movdqa %xmm2,144(%r10) 600 movdqa %xmm4,%xmm2 601 602 paddd %xmm0,%xmm1 603 pcmpeqd %xmm5,%xmm0 604 movdqa %xmm3,160(%r10) 605 movdqa %xmm4,%xmm3 606 paddd %xmm1,%xmm2 607 pcmpeqd %xmm5,%xmm1 608 movdqa %xmm0,176(%r10) 609 movdqa %xmm4,%xmm0 610 611 paddd %xmm2,%xmm3 612 pcmpeqd %xmm5,%xmm2 613 movdqa %xmm1,192(%r10) 614 movdqa %xmm4,%xmm1 615 616 paddd %xmm3,%xmm0 617 pcmpeqd %xmm5,%xmm3 618 movdqa %xmm2,208(%r10) 619 movdqa %xmm4,%xmm2 620 621 paddd %xmm0,%xmm1 622 pcmpeqd %xmm5,%xmm0 623 movdqa %xmm3,224(%r10) 624 movdqa %xmm4,%xmm3 625 paddd %xmm1,%xmm2 626 pcmpeqd %xmm5,%xmm1 627 movdqa %xmm0,240(%r10) 628 movdqa %xmm4,%xmm0 629 630 paddd %xmm2,%xmm3 631 pcmpeqd %xmm5,%xmm2 632 movdqa %xmm1,256(%r10) 633 movdqa %xmm4,%xmm1 634 635 paddd %xmm3,%xmm0 636 pcmpeqd %xmm5,%xmm3 637 movdqa %xmm2,272(%r10) 638 movdqa %xmm4,%xmm2 639 640 paddd %xmm0,%xmm1 641 pcmpeqd %xmm5,%xmm0 642 movdqa %xmm3,288(%r10) 643 movdqa %xmm4,%xmm3 644 paddd %xmm1,%xmm2 645 pcmpeqd %xmm5,%xmm1 646 movdqa %xmm0,304(%r10) 647 648 paddd %xmm2,%xmm3 649.byte 0x67 650 pcmpeqd %xmm5,%xmm2 651 movdqa %xmm1,320(%r10) 652 653 pcmpeqd %xmm5,%xmm3 654 movdqa %xmm2,336(%r10) 655 pand 64(%r12),%xmm0 656 657 pand 80(%r12),%xmm1 658 pand 96(%r12),%xmm2 659 movdqa %xmm3,352(%r10) 660 pand 112(%r12),%xmm3 661 por %xmm2,%xmm0 662 por %xmm3,%xmm1 663 movdqa -128(%r12),%xmm4 664 movdqa -112(%r12),%xmm5 665 movdqa -96(%r12),%xmm2 666 pand 112(%r10),%xmm4 667 movdqa -80(%r12),%xmm3 668 pand 128(%r10),%xmm5 669 por %xmm4,%xmm0 670 pand 144(%r10),%xmm2 671 por %xmm5,%xmm1 672 pand 160(%r10),%xmm3 673 por %xmm2,%xmm0 674 por %xmm3,%xmm1 675 movdqa -64(%r12),%xmm4 676 movdqa -48(%r12),%xmm5 677 movdqa -32(%r12),%xmm2 678 pand 176(%r10),%xmm4 679 movdqa -16(%r12),%xmm3 680 pand 192(%r10),%xmm5 681 por %xmm4,%xmm0 682 pand 208(%r10),%xmm2 683 por %xmm5,%xmm1 684 pand 224(%r10),%xmm3 685 por %xmm2,%xmm0 686 por %xmm3,%xmm1 687 movdqa 0(%r12),%xmm4 688 movdqa 16(%r12),%xmm5 689 movdqa 32(%r12),%xmm2 690 pand 240(%r10),%xmm4 691 movdqa 48(%r12),%xmm3 692 pand 256(%r10),%xmm5 693 por %xmm4,%xmm0 694 pand 272(%r10),%xmm2 695 por %xmm5,%xmm1 696 pand 288(%r10),%xmm3 697 por %xmm2,%xmm0 698 por %xmm3,%xmm1 699 por %xmm1,%xmm0 700 pshufd $0x4e,%xmm0,%xmm1 701 por %xmm1,%xmm0 702 leaq 256(%r12),%r12 703.byte 102,72,15,126,195 704 705 movq %r13,16+8(%rsp) 706 movq %rdi,56+8(%rsp) 707 708 movq (%r8),%r8 709 movq (%rsi),%rax 710 leaq (%rsi,%r9,1),%rsi 711 negq %r9 712 713 movq %r8,%rbp 714 mulq %rbx 715 movq %rax,%r10 716 movq (%rcx),%rax 717 718 imulq %r10,%rbp 719 leaq 64+8(%rsp),%r14 720 movq %rdx,%r11 721 722 mulq %rbp 723 addq %rax,%r10 724 movq 8(%rsi,%r9,1),%rax 725 adcq $0,%rdx 726 movq %rdx,%rdi 727 728 mulq %rbx 729 addq %rax,%r11 730 movq 8(%rcx),%rax 731 adcq $0,%rdx 732 movq %rdx,%r10 733 734 mulq %rbp 735 addq %rax,%rdi 736 movq 16(%rsi,%r9,1),%rax 737 adcq $0,%rdx 738 addq %r11,%rdi 739 leaq 32(%r9),%r15 740 leaq 32(%rcx),%rcx 741 adcq $0,%rdx 742 movq %rdi,(%r14) 743 movq %rdx,%r13 744 jmp L$1st4x 745 746.p2align 5 747L$1st4x: 748 mulq %rbx 749 addq %rax,%r10 750 movq -16(%rcx),%rax 751 leaq 32(%r14),%r14 752 adcq $0,%rdx 753 movq %rdx,%r11 754 755 mulq %rbp 756 addq %rax,%r13 757 movq -8(%rsi,%r15,1),%rax 758 adcq $0,%rdx 759 addq %r10,%r13 760 adcq $0,%rdx 761 movq %r13,-24(%r14) 762 movq %rdx,%rdi 763 764 mulq %rbx 765 addq %rax,%r11 766 movq -8(%rcx),%rax 767 adcq $0,%rdx 768 movq %rdx,%r10 769 770 mulq %rbp 771 addq %rax,%rdi 772 movq (%rsi,%r15,1),%rax 773 adcq $0,%rdx 774 addq %r11,%rdi 775 adcq $0,%rdx 776 movq %rdi,-16(%r14) 777 movq %rdx,%r13 778 779 mulq %rbx 780 addq %rax,%r10 781 movq 0(%rcx),%rax 782 adcq $0,%rdx 783 movq %rdx,%r11 784 785 mulq %rbp 786 addq %rax,%r13 787 movq 8(%rsi,%r15,1),%rax 788 adcq $0,%rdx 789 addq %r10,%r13 790 adcq $0,%rdx 791 movq %r13,-8(%r14) 792 movq %rdx,%rdi 793 794 mulq %rbx 795 addq %rax,%r11 796 movq 8(%rcx),%rax 797 adcq $0,%rdx 798 movq %rdx,%r10 799 800 mulq %rbp 801 addq %rax,%rdi 802 movq 16(%rsi,%r15,1),%rax 803 adcq $0,%rdx 804 addq %r11,%rdi 805 leaq 32(%rcx),%rcx 806 adcq $0,%rdx 807 movq %rdi,(%r14) 808 movq %rdx,%r13 809 810 addq $32,%r15 811 jnz L$1st4x 812 813 mulq %rbx 814 addq %rax,%r10 815 movq -16(%rcx),%rax 816 leaq 32(%r14),%r14 817 adcq $0,%rdx 818 movq %rdx,%r11 819 820 mulq %rbp 821 addq %rax,%r13 822 movq -8(%rsi),%rax 823 adcq $0,%rdx 824 addq %r10,%r13 825 adcq $0,%rdx 826 movq %r13,-24(%r14) 827 movq %rdx,%rdi 828 829 mulq %rbx 830 addq %rax,%r11 831 movq -8(%rcx),%rax 832 adcq $0,%rdx 833 movq %rdx,%r10 834 835 mulq %rbp 836 addq %rax,%rdi 837 movq (%rsi,%r9,1),%rax 838 adcq $0,%rdx 839 addq %r11,%rdi 840 adcq $0,%rdx 841 movq %rdi,-16(%r14) 842 movq %rdx,%r13 843 844 leaq (%rcx,%r9,1),%rcx 845 846 xorq %rdi,%rdi 847 addq %r10,%r13 848 adcq $0,%rdi 849 movq %r13,-8(%r14) 850 851 jmp L$outer4x 852 853.p2align 5 854L$outer4x: 855 leaq 16+128(%r14),%rdx 856 pxor %xmm4,%xmm4 857 pxor %xmm5,%xmm5 858 movdqa -128(%r12),%xmm0 859 movdqa -112(%r12),%xmm1 860 movdqa -96(%r12),%xmm2 861 movdqa -80(%r12),%xmm3 862 pand -128(%rdx),%xmm0 863 pand -112(%rdx),%xmm1 864 por %xmm0,%xmm4 865 pand -96(%rdx),%xmm2 866 por %xmm1,%xmm5 867 pand -80(%rdx),%xmm3 868 por %xmm2,%xmm4 869 por %xmm3,%xmm5 870 movdqa -64(%r12),%xmm0 871 movdqa -48(%r12),%xmm1 872 movdqa -32(%r12),%xmm2 873 movdqa -16(%r12),%xmm3 874 pand -64(%rdx),%xmm0 875 pand -48(%rdx),%xmm1 876 por %xmm0,%xmm4 877 pand -32(%rdx),%xmm2 878 por %xmm1,%xmm5 879 pand -16(%rdx),%xmm3 880 por %xmm2,%xmm4 881 por %xmm3,%xmm5 882 movdqa 0(%r12),%xmm0 883 movdqa 16(%r12),%xmm1 884 movdqa 32(%r12),%xmm2 885 movdqa 48(%r12),%xmm3 886 pand 0(%rdx),%xmm0 887 pand 16(%rdx),%xmm1 888 por %xmm0,%xmm4 889 pand 32(%rdx),%xmm2 890 por %xmm1,%xmm5 891 pand 48(%rdx),%xmm3 892 por %xmm2,%xmm4 893 por %xmm3,%xmm5 894 movdqa 64(%r12),%xmm0 895 movdqa 80(%r12),%xmm1 896 movdqa 96(%r12),%xmm2 897 movdqa 112(%r12),%xmm3 898 pand 64(%rdx),%xmm0 899 pand 80(%rdx),%xmm1 900 por %xmm0,%xmm4 901 pand 96(%rdx),%xmm2 902 por %xmm1,%xmm5 903 pand 112(%rdx),%xmm3 904 por %xmm2,%xmm4 905 por %xmm3,%xmm5 906 por %xmm5,%xmm4 907 pshufd $0x4e,%xmm4,%xmm0 908 por %xmm4,%xmm0 909 leaq 256(%r12),%r12 910.byte 102,72,15,126,195 911 912 movq (%r14,%r9,1),%r10 913 movq %r8,%rbp 914 mulq %rbx 915 addq %rax,%r10 916 movq (%rcx),%rax 917 adcq $0,%rdx 918 919 imulq %r10,%rbp 920 movq %rdx,%r11 921 movq %rdi,(%r14) 922 923 leaq (%r14,%r9,1),%r14 924 925 mulq %rbp 926 addq %rax,%r10 927 movq 8(%rsi,%r9,1),%rax 928 adcq $0,%rdx 929 movq %rdx,%rdi 930 931 mulq %rbx 932 addq %rax,%r11 933 movq 8(%rcx),%rax 934 adcq $0,%rdx 935 addq 8(%r14),%r11 936 adcq $0,%rdx 937 movq %rdx,%r10 938 939 mulq %rbp 940 addq %rax,%rdi 941 movq 16(%rsi,%r9,1),%rax 942 adcq $0,%rdx 943 addq %r11,%rdi 944 leaq 32(%r9),%r15 945 leaq 32(%rcx),%rcx 946 adcq $0,%rdx 947 movq %rdx,%r13 948 jmp L$inner4x 949 950.p2align 5 951L$inner4x: 952 mulq %rbx 953 addq %rax,%r10 954 movq -16(%rcx),%rax 955 adcq $0,%rdx 956 addq 16(%r14),%r10 957 leaq 32(%r14),%r14 958 adcq $0,%rdx 959 movq %rdx,%r11 960 961 mulq %rbp 962 addq %rax,%r13 963 movq -8(%rsi,%r15,1),%rax 964 adcq $0,%rdx 965 addq %r10,%r13 966 adcq $0,%rdx 967 movq %rdi,-32(%r14) 968 movq %rdx,%rdi 969 970 mulq %rbx 971 addq %rax,%r11 972 movq -8(%rcx),%rax 973 adcq $0,%rdx 974 addq -8(%r14),%r11 975 adcq $0,%rdx 976 movq %rdx,%r10 977 978 mulq %rbp 979 addq %rax,%rdi 980 movq (%rsi,%r15,1),%rax 981 adcq $0,%rdx 982 addq %r11,%rdi 983 adcq $0,%rdx 984 movq %r13,-24(%r14) 985 movq %rdx,%r13 986 987 mulq %rbx 988 addq %rax,%r10 989 movq 0(%rcx),%rax 990 adcq $0,%rdx 991 addq (%r14),%r10 992 adcq $0,%rdx 993 movq %rdx,%r11 994 995 mulq %rbp 996 addq %rax,%r13 997 movq 8(%rsi,%r15,1),%rax 998 adcq $0,%rdx 999 addq %r10,%r13 1000 adcq $0,%rdx 1001 movq %rdi,-16(%r14) 1002 movq %rdx,%rdi 1003 1004 mulq %rbx 1005 addq %rax,%r11 1006 movq 8(%rcx),%rax 1007 adcq $0,%rdx 1008 addq 8(%r14),%r11 1009 adcq $0,%rdx 1010 movq %rdx,%r10 1011 1012 mulq %rbp 1013 addq %rax,%rdi 1014 movq 16(%rsi,%r15,1),%rax 1015 adcq $0,%rdx 1016 addq %r11,%rdi 1017 leaq 32(%rcx),%rcx 1018 adcq $0,%rdx 1019 movq %r13,-8(%r14) 1020 movq %rdx,%r13 1021 1022 addq $32,%r15 1023 jnz L$inner4x 1024 1025 mulq %rbx 1026 addq %rax,%r10 1027 movq -16(%rcx),%rax 1028 adcq $0,%rdx 1029 addq 16(%r14),%r10 1030 leaq 32(%r14),%r14 1031 adcq $0,%rdx 1032 movq %rdx,%r11 1033 1034 mulq %rbp 1035 addq %rax,%r13 1036 movq -8(%rsi),%rax 1037 adcq $0,%rdx 1038 addq %r10,%r13 1039 adcq $0,%rdx 1040 movq %rdi,-32(%r14) 1041 movq %rdx,%rdi 1042 1043 mulq %rbx 1044 addq %rax,%r11 1045 movq %rbp,%rax 1046 movq -8(%rcx),%rbp 1047 adcq $0,%rdx 1048 addq -8(%r14),%r11 1049 adcq $0,%rdx 1050 movq %rdx,%r10 1051 1052 mulq %rbp 1053 addq %rax,%rdi 1054 movq (%rsi,%r9,1),%rax 1055 adcq $0,%rdx 1056 addq %r11,%rdi 1057 adcq $0,%rdx 1058 movq %r13,-24(%r14) 1059 movq %rdx,%r13 1060 1061 movq %rdi,-16(%r14) 1062 leaq (%rcx,%r9,1),%rcx 1063 1064 xorq %rdi,%rdi 1065 addq %r10,%r13 1066 adcq $0,%rdi 1067 addq (%r14),%r13 1068 adcq $0,%rdi 1069 movq %r13,-8(%r14) 1070 1071 cmpq 16+8(%rsp),%r12 1072 jb L$outer4x 1073 xorq %rax,%rax 1074 subq %r13,%rbp 1075 adcq %r15,%r15 1076 orq %r15,%rdi 1077 subq %rdi,%rax 1078 leaq (%r14,%r9,1),%rbx 1079 movq (%rcx),%r12 1080 leaq (%rcx),%rbp 1081 movq %r9,%rcx 1082 sarq $3+2,%rcx 1083 movq 56+8(%rsp),%rdi 1084 decq %r12 1085 xorq %r10,%r10 1086 movq 8(%rbp),%r13 1087 movq 16(%rbp),%r14 1088 movq 24(%rbp),%r15 1089 jmp L$sqr4x_sub_entry 1090 1091 1092.globl _bn_power5 1093.private_extern _bn_power5 1094 1095.p2align 5 1096_bn_power5: 1097 1098 movq %rsp,%rax 1099 1100 leaq _OPENSSL_ia32cap_P(%rip),%r11 1101 movl 8(%r11),%r11d 1102 andl $0x80108,%r11d 1103 cmpl $0x80108,%r11d 1104 je L$powerx5_enter 1105 pushq %rbx 1106 1107 pushq %rbp 1108 1109 pushq %r12 1110 1111 pushq %r13 1112 1113 pushq %r14 1114 1115 pushq %r15 1116 1117L$power5_prologue: 1118 1119 shll $3,%r9d 1120 leal (%r9,%r9,2),%r10d 1121 negq %r9 1122 movq (%r8),%r8 1123 1124 1125 1126 1127 1128 1129 1130 1131 leaq -320(%rsp,%r9,2),%r11 1132 movq %rsp,%rbp 1133 subq %rdi,%r11 1134 andq $4095,%r11 1135 cmpq %r11,%r10 1136 jb L$pwr_sp_alt 1137 subq %r11,%rbp 1138 leaq -320(%rbp,%r9,2),%rbp 1139 jmp L$pwr_sp_done 1140 1141.p2align 5 1142L$pwr_sp_alt: 1143 leaq 4096-320(,%r9,2),%r10 1144 leaq -320(%rbp,%r9,2),%rbp 1145 subq %r10,%r11 1146 movq $0,%r10 1147 cmovcq %r10,%r11 1148 subq %r11,%rbp 1149L$pwr_sp_done: 1150 andq $-64,%rbp 1151 movq %rsp,%r11 1152 subq %rbp,%r11 1153 andq $-4096,%r11 1154 leaq (%r11,%rbp,1),%rsp 1155 movq (%rsp),%r10 1156 cmpq %rbp,%rsp 1157 ja L$pwr_page_walk 1158 jmp L$pwr_page_walk_done 1159 1160L$pwr_page_walk: 1161 leaq -4096(%rsp),%rsp 1162 movq (%rsp),%r10 1163 cmpq %rbp,%rsp 1164 ja L$pwr_page_walk 1165L$pwr_page_walk_done: 1166 1167 movq %r9,%r10 1168 negq %r9 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 movq %r8,32(%rsp) 1180 movq %rax,40(%rsp) 1181 1182L$power5_body: 1183.byte 102,72,15,110,207 1184.byte 102,72,15,110,209 1185.byte 102,73,15,110,218 1186.byte 102,72,15,110,226 1187 1188 call __bn_sqr8x_internal 1189 call __bn_post4x_internal 1190 call __bn_sqr8x_internal 1191 call __bn_post4x_internal 1192 call __bn_sqr8x_internal 1193 call __bn_post4x_internal 1194 call __bn_sqr8x_internal 1195 call __bn_post4x_internal 1196 call __bn_sqr8x_internal 1197 call __bn_post4x_internal 1198 1199.byte 102,72,15,126,209 1200.byte 102,72,15,126,226 1201 movq %rsi,%rdi 1202 movq 40(%rsp),%rax 1203 leaq 32(%rsp),%r8 1204 1205 call mul4x_internal 1206 1207 movq 40(%rsp),%rsi 1208 1209 movq $1,%rax 1210 movq -48(%rsi),%r15 1211 1212 movq -40(%rsi),%r14 1213 1214 movq -32(%rsi),%r13 1215 1216 movq -24(%rsi),%r12 1217 1218 movq -16(%rsi),%rbp 1219 1220 movq -8(%rsi),%rbx 1221 1222 leaq (%rsi),%rsp 1223 1224L$power5_epilogue: 1225 .byte 0xf3,0xc3 1226 1227 1228 1229.globl _bn_sqr8x_internal 1230.private_extern _bn_sqr8x_internal 1231.private_extern _bn_sqr8x_internal 1232 1233.p2align 5 1234_bn_sqr8x_internal: 1235__bn_sqr8x_internal: 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305 1306 1307 1308 1309 1310 leaq 32(%r10),%rbp 1311 leaq (%rsi,%r9,1),%rsi 1312 1313 movq %r9,%rcx 1314 1315 1316 movq -32(%rsi,%rbp,1),%r14 1317 leaq 48+8(%rsp,%r9,2),%rdi 1318 movq -24(%rsi,%rbp,1),%rax 1319 leaq -32(%rdi,%rbp,1),%rdi 1320 movq -16(%rsi,%rbp,1),%rbx 1321 movq %rax,%r15 1322 1323 mulq %r14 1324 movq %rax,%r10 1325 movq %rbx,%rax 1326 movq %rdx,%r11 1327 movq %r10,-24(%rdi,%rbp,1) 1328 1329 mulq %r14 1330 addq %rax,%r11 1331 movq %rbx,%rax 1332 adcq $0,%rdx 1333 movq %r11,-16(%rdi,%rbp,1) 1334 movq %rdx,%r10 1335 1336 1337 movq -8(%rsi,%rbp,1),%rbx 1338 mulq %r15 1339 movq %rax,%r12 1340 movq %rbx,%rax 1341 movq %rdx,%r13 1342 1343 leaq (%rbp),%rcx 1344 mulq %r14 1345 addq %rax,%r10 1346 movq %rbx,%rax 1347 movq %rdx,%r11 1348 adcq $0,%r11 1349 addq %r12,%r10 1350 adcq $0,%r11 1351 movq %r10,-8(%rdi,%rcx,1) 1352 jmp L$sqr4x_1st 1353 1354.p2align 5 1355L$sqr4x_1st: 1356 movq (%rsi,%rcx,1),%rbx 1357 mulq %r15 1358 addq %rax,%r13 1359 movq %rbx,%rax 1360 movq %rdx,%r12 1361 adcq $0,%r12 1362 1363 mulq %r14 1364 addq %rax,%r11 1365 movq %rbx,%rax 1366 movq 8(%rsi,%rcx,1),%rbx 1367 movq %rdx,%r10 1368 adcq $0,%r10 1369 addq %r13,%r11 1370 adcq $0,%r10 1371 1372 1373 mulq %r15 1374 addq %rax,%r12 1375 movq %rbx,%rax 1376 movq %r11,(%rdi,%rcx,1) 1377 movq %rdx,%r13 1378 adcq $0,%r13 1379 1380 mulq %r14 1381 addq %rax,%r10 1382 movq %rbx,%rax 1383 movq 16(%rsi,%rcx,1),%rbx 1384 movq %rdx,%r11 1385 adcq $0,%r11 1386 addq %r12,%r10 1387 adcq $0,%r11 1388 1389 mulq %r15 1390 addq %rax,%r13 1391 movq %rbx,%rax 1392 movq %r10,8(%rdi,%rcx,1) 1393 movq %rdx,%r12 1394 adcq $0,%r12 1395 1396 mulq %r14 1397 addq %rax,%r11 1398 movq %rbx,%rax 1399 movq 24(%rsi,%rcx,1),%rbx 1400 movq %rdx,%r10 1401 adcq $0,%r10 1402 addq %r13,%r11 1403 adcq $0,%r10 1404 1405 1406 mulq %r15 1407 addq %rax,%r12 1408 movq %rbx,%rax 1409 movq %r11,16(%rdi,%rcx,1) 1410 movq %rdx,%r13 1411 adcq $0,%r13 1412 leaq 32(%rcx),%rcx 1413 1414 mulq %r14 1415 addq %rax,%r10 1416 movq %rbx,%rax 1417 movq %rdx,%r11 1418 adcq $0,%r11 1419 addq %r12,%r10 1420 adcq $0,%r11 1421 movq %r10,-8(%rdi,%rcx,1) 1422 1423 cmpq $0,%rcx 1424 jne L$sqr4x_1st 1425 1426 mulq %r15 1427 addq %rax,%r13 1428 leaq 16(%rbp),%rbp 1429 adcq $0,%rdx 1430 addq %r11,%r13 1431 adcq $0,%rdx 1432 1433 movq %r13,(%rdi) 1434 movq %rdx,%r12 1435 movq %rdx,8(%rdi) 1436 jmp L$sqr4x_outer 1437 1438.p2align 5 1439L$sqr4x_outer: 1440 movq -32(%rsi,%rbp,1),%r14 1441 leaq 48+8(%rsp,%r9,2),%rdi 1442 movq -24(%rsi,%rbp,1),%rax 1443 leaq -32(%rdi,%rbp,1),%rdi 1444 movq -16(%rsi,%rbp,1),%rbx 1445 movq %rax,%r15 1446 1447 mulq %r14 1448 movq -24(%rdi,%rbp,1),%r10 1449 addq %rax,%r10 1450 movq %rbx,%rax 1451 adcq $0,%rdx 1452 movq %r10,-24(%rdi,%rbp,1) 1453 movq %rdx,%r11 1454 1455 mulq %r14 1456 addq %rax,%r11 1457 movq %rbx,%rax 1458 adcq $0,%rdx 1459 addq -16(%rdi,%rbp,1),%r11 1460 movq %rdx,%r10 1461 adcq $0,%r10 1462 movq %r11,-16(%rdi,%rbp,1) 1463 1464 xorq %r12,%r12 1465 1466 movq -8(%rsi,%rbp,1),%rbx 1467 mulq %r15 1468 addq %rax,%r12 1469 movq %rbx,%rax 1470 adcq $0,%rdx 1471 addq -8(%rdi,%rbp,1),%r12 1472 movq %rdx,%r13 1473 adcq $0,%r13 1474 1475 mulq %r14 1476 addq %rax,%r10 1477 movq %rbx,%rax 1478 adcq $0,%rdx 1479 addq %r12,%r10 1480 movq %rdx,%r11 1481 adcq $0,%r11 1482 movq %r10,-8(%rdi,%rbp,1) 1483 1484 leaq (%rbp),%rcx 1485 jmp L$sqr4x_inner 1486 1487.p2align 5 1488L$sqr4x_inner: 1489 movq (%rsi,%rcx,1),%rbx 1490 mulq %r15 1491 addq %rax,%r13 1492 movq %rbx,%rax 1493 movq %rdx,%r12 1494 adcq $0,%r12 1495 addq (%rdi,%rcx,1),%r13 1496 adcq $0,%r12 1497 1498.byte 0x67 1499 mulq %r14 1500 addq %rax,%r11 1501 movq %rbx,%rax 1502 movq 8(%rsi,%rcx,1),%rbx 1503 movq %rdx,%r10 1504 adcq $0,%r10 1505 addq %r13,%r11 1506 adcq $0,%r10 1507 1508 mulq %r15 1509 addq %rax,%r12 1510 movq %r11,(%rdi,%rcx,1) 1511 movq %rbx,%rax 1512 movq %rdx,%r13 1513 adcq $0,%r13 1514 addq 8(%rdi,%rcx,1),%r12 1515 leaq 16(%rcx),%rcx 1516 adcq $0,%r13 1517 1518 mulq %r14 1519 addq %rax,%r10 1520 movq %rbx,%rax 1521 adcq $0,%rdx 1522 addq %r12,%r10 1523 movq %rdx,%r11 1524 adcq $0,%r11 1525 movq %r10,-8(%rdi,%rcx,1) 1526 1527 cmpq $0,%rcx 1528 jne L$sqr4x_inner 1529 1530.byte 0x67 1531 mulq %r15 1532 addq %rax,%r13 1533 adcq $0,%rdx 1534 addq %r11,%r13 1535 adcq $0,%rdx 1536 1537 movq %r13,(%rdi) 1538 movq %rdx,%r12 1539 movq %rdx,8(%rdi) 1540 1541 addq $16,%rbp 1542 jnz L$sqr4x_outer 1543 1544 1545 movq -32(%rsi),%r14 1546 leaq 48+8(%rsp,%r9,2),%rdi 1547 movq -24(%rsi),%rax 1548 leaq -32(%rdi,%rbp,1),%rdi 1549 movq -16(%rsi),%rbx 1550 movq %rax,%r15 1551 1552 mulq %r14 1553 addq %rax,%r10 1554 movq %rbx,%rax 1555 movq %rdx,%r11 1556 adcq $0,%r11 1557 1558 mulq %r14 1559 addq %rax,%r11 1560 movq %rbx,%rax 1561 movq %r10,-24(%rdi) 1562 movq %rdx,%r10 1563 adcq $0,%r10 1564 addq %r13,%r11 1565 movq -8(%rsi),%rbx 1566 adcq $0,%r10 1567 1568 mulq %r15 1569 addq %rax,%r12 1570 movq %rbx,%rax 1571 movq %r11,-16(%rdi) 1572 movq %rdx,%r13 1573 adcq $0,%r13 1574 1575 mulq %r14 1576 addq %rax,%r10 1577 movq %rbx,%rax 1578 movq %rdx,%r11 1579 adcq $0,%r11 1580 addq %r12,%r10 1581 adcq $0,%r11 1582 movq %r10,-8(%rdi) 1583 1584 mulq %r15 1585 addq %rax,%r13 1586 movq -16(%rsi),%rax 1587 adcq $0,%rdx 1588 addq %r11,%r13 1589 adcq $0,%rdx 1590 1591 movq %r13,(%rdi) 1592 movq %rdx,%r12 1593 movq %rdx,8(%rdi) 1594 1595 mulq %rbx 1596 addq $16,%rbp 1597 xorq %r14,%r14 1598 subq %r9,%rbp 1599 xorq %r15,%r15 1600 1601 addq %r12,%rax 1602 adcq $0,%rdx 1603 movq %rax,8(%rdi) 1604 movq %rdx,16(%rdi) 1605 movq %r15,24(%rdi) 1606 1607 movq -16(%rsi,%rbp,1),%rax 1608 leaq 48+8(%rsp),%rdi 1609 xorq %r10,%r10 1610 movq 8(%rdi),%r11 1611 1612 leaq (%r14,%r10,2),%r12 1613 shrq $63,%r10 1614 leaq (%rcx,%r11,2),%r13 1615 shrq $63,%r11 1616 orq %r10,%r13 1617 movq 16(%rdi),%r10 1618 movq %r11,%r14 1619 mulq %rax 1620 negq %r15 1621 movq 24(%rdi),%r11 1622 adcq %rax,%r12 1623 movq -8(%rsi,%rbp,1),%rax 1624 movq %r12,(%rdi) 1625 adcq %rdx,%r13 1626 1627 leaq (%r14,%r10,2),%rbx 1628 movq %r13,8(%rdi) 1629 sbbq %r15,%r15 1630 shrq $63,%r10 1631 leaq (%rcx,%r11,2),%r8 1632 shrq $63,%r11 1633 orq %r10,%r8 1634 movq 32(%rdi),%r10 1635 movq %r11,%r14 1636 mulq %rax 1637 negq %r15 1638 movq 40(%rdi),%r11 1639 adcq %rax,%rbx 1640 movq 0(%rsi,%rbp,1),%rax 1641 movq %rbx,16(%rdi) 1642 adcq %rdx,%r8 1643 leaq 16(%rbp),%rbp 1644 movq %r8,24(%rdi) 1645 sbbq %r15,%r15 1646 leaq 64(%rdi),%rdi 1647 jmp L$sqr4x_shift_n_add 1648 1649.p2align 5 1650L$sqr4x_shift_n_add: 1651 leaq (%r14,%r10,2),%r12 1652 shrq $63,%r10 1653 leaq (%rcx,%r11,2),%r13 1654 shrq $63,%r11 1655 orq %r10,%r13 1656 movq -16(%rdi),%r10 1657 movq %r11,%r14 1658 mulq %rax 1659 negq %r15 1660 movq -8(%rdi),%r11 1661 adcq %rax,%r12 1662 movq -8(%rsi,%rbp,1),%rax 1663 movq %r12,-32(%rdi) 1664 adcq %rdx,%r13 1665 1666 leaq (%r14,%r10,2),%rbx 1667 movq %r13,-24(%rdi) 1668 sbbq %r15,%r15 1669 shrq $63,%r10 1670 leaq (%rcx,%r11,2),%r8 1671 shrq $63,%r11 1672 orq %r10,%r8 1673 movq 0(%rdi),%r10 1674 movq %r11,%r14 1675 mulq %rax 1676 negq %r15 1677 movq 8(%rdi),%r11 1678 adcq %rax,%rbx 1679 movq 0(%rsi,%rbp,1),%rax 1680 movq %rbx,-16(%rdi) 1681 adcq %rdx,%r8 1682 1683 leaq (%r14,%r10,2),%r12 1684 movq %r8,-8(%rdi) 1685 sbbq %r15,%r15 1686 shrq $63,%r10 1687 leaq (%rcx,%r11,2),%r13 1688 shrq $63,%r11 1689 orq %r10,%r13 1690 movq 16(%rdi),%r10 1691 movq %r11,%r14 1692 mulq %rax 1693 negq %r15 1694 movq 24(%rdi),%r11 1695 adcq %rax,%r12 1696 movq 8(%rsi,%rbp,1),%rax 1697 movq %r12,0(%rdi) 1698 adcq %rdx,%r13 1699 1700 leaq (%r14,%r10,2),%rbx 1701 movq %r13,8(%rdi) 1702 sbbq %r15,%r15 1703 shrq $63,%r10 1704 leaq (%rcx,%r11,2),%r8 1705 shrq $63,%r11 1706 orq %r10,%r8 1707 movq 32(%rdi),%r10 1708 movq %r11,%r14 1709 mulq %rax 1710 negq %r15 1711 movq 40(%rdi),%r11 1712 adcq %rax,%rbx 1713 movq 16(%rsi,%rbp,1),%rax 1714 movq %rbx,16(%rdi) 1715 adcq %rdx,%r8 1716 movq %r8,24(%rdi) 1717 sbbq %r15,%r15 1718 leaq 64(%rdi),%rdi 1719 addq $32,%rbp 1720 jnz L$sqr4x_shift_n_add 1721 1722 leaq (%r14,%r10,2),%r12 1723.byte 0x67 1724 shrq $63,%r10 1725 leaq (%rcx,%r11,2),%r13 1726 shrq $63,%r11 1727 orq %r10,%r13 1728 movq -16(%rdi),%r10 1729 movq %r11,%r14 1730 mulq %rax 1731 negq %r15 1732 movq -8(%rdi),%r11 1733 adcq %rax,%r12 1734 movq -8(%rsi),%rax 1735 movq %r12,-32(%rdi) 1736 adcq %rdx,%r13 1737 1738 leaq (%r14,%r10,2),%rbx 1739 movq %r13,-24(%rdi) 1740 sbbq %r15,%r15 1741 shrq $63,%r10 1742 leaq (%rcx,%r11,2),%r8 1743 shrq $63,%r11 1744 orq %r10,%r8 1745 mulq %rax 1746 negq %r15 1747 adcq %rax,%rbx 1748 adcq %rdx,%r8 1749 movq %rbx,-16(%rdi) 1750 movq %r8,-8(%rdi) 1751.byte 102,72,15,126,213 1752__bn_sqr8x_reduction: 1753 xorq %rax,%rax 1754 leaq (%r9,%rbp,1),%rcx 1755 leaq 48+8(%rsp,%r9,2),%rdx 1756 movq %rcx,0+8(%rsp) 1757 leaq 48+8(%rsp,%r9,1),%rdi 1758 movq %rdx,8+8(%rsp) 1759 negq %r9 1760 jmp L$8x_reduction_loop 1761 1762.p2align 5 1763L$8x_reduction_loop: 1764 leaq (%rdi,%r9,1),%rdi 1765.byte 0x66 1766 movq 0(%rdi),%rbx 1767 movq 8(%rdi),%r9 1768 movq 16(%rdi),%r10 1769 movq 24(%rdi),%r11 1770 movq 32(%rdi),%r12 1771 movq 40(%rdi),%r13 1772 movq 48(%rdi),%r14 1773 movq 56(%rdi),%r15 1774 movq %rax,(%rdx) 1775 leaq 64(%rdi),%rdi 1776 1777.byte 0x67 1778 movq %rbx,%r8 1779 imulq 32+8(%rsp),%rbx 1780 movq 0(%rbp),%rax 1781 movl $8,%ecx 1782 jmp L$8x_reduce 1783 1784.p2align 5 1785L$8x_reduce: 1786 mulq %rbx 1787 movq 8(%rbp),%rax 1788 negq %r8 1789 movq %rdx,%r8 1790 adcq $0,%r8 1791 1792 mulq %rbx 1793 addq %rax,%r9 1794 movq 16(%rbp),%rax 1795 adcq $0,%rdx 1796 addq %r9,%r8 1797 movq %rbx,48-8+8(%rsp,%rcx,8) 1798 movq %rdx,%r9 1799 adcq $0,%r9 1800 1801 mulq %rbx 1802 addq %rax,%r10 1803 movq 24(%rbp),%rax 1804 adcq $0,%rdx 1805 addq %r10,%r9 1806 movq 32+8(%rsp),%rsi 1807 movq %rdx,%r10 1808 adcq $0,%r10 1809 1810 mulq %rbx 1811 addq %rax,%r11 1812 movq 32(%rbp),%rax 1813 adcq $0,%rdx 1814 imulq %r8,%rsi 1815 addq %r11,%r10 1816 movq %rdx,%r11 1817 adcq $0,%r11 1818 1819 mulq %rbx 1820 addq %rax,%r12 1821 movq 40(%rbp),%rax 1822 adcq $0,%rdx 1823 addq %r12,%r11 1824 movq %rdx,%r12 1825 adcq $0,%r12 1826 1827 mulq %rbx 1828 addq %rax,%r13 1829 movq 48(%rbp),%rax 1830 adcq $0,%rdx 1831 addq %r13,%r12 1832 movq %rdx,%r13 1833 adcq $0,%r13 1834 1835 mulq %rbx 1836 addq %rax,%r14 1837 movq 56(%rbp),%rax 1838 adcq $0,%rdx 1839 addq %r14,%r13 1840 movq %rdx,%r14 1841 adcq $0,%r14 1842 1843 mulq %rbx 1844 movq %rsi,%rbx 1845 addq %rax,%r15 1846 movq 0(%rbp),%rax 1847 adcq $0,%rdx 1848 addq %r15,%r14 1849 movq %rdx,%r15 1850 adcq $0,%r15 1851 1852 decl %ecx 1853 jnz L$8x_reduce 1854 1855 leaq 64(%rbp),%rbp 1856 xorq %rax,%rax 1857 movq 8+8(%rsp),%rdx 1858 cmpq 0+8(%rsp),%rbp 1859 jae L$8x_no_tail 1860 1861.byte 0x66 1862 addq 0(%rdi),%r8 1863 adcq 8(%rdi),%r9 1864 adcq 16(%rdi),%r10 1865 adcq 24(%rdi),%r11 1866 adcq 32(%rdi),%r12 1867 adcq 40(%rdi),%r13 1868 adcq 48(%rdi),%r14 1869 adcq 56(%rdi),%r15 1870 sbbq %rsi,%rsi 1871 1872 movq 48+56+8(%rsp),%rbx 1873 movl $8,%ecx 1874 movq 0(%rbp),%rax 1875 jmp L$8x_tail 1876 1877.p2align 5 1878L$8x_tail: 1879 mulq %rbx 1880 addq %rax,%r8 1881 movq 8(%rbp),%rax 1882 movq %r8,(%rdi) 1883 movq %rdx,%r8 1884 adcq $0,%r8 1885 1886 mulq %rbx 1887 addq %rax,%r9 1888 movq 16(%rbp),%rax 1889 adcq $0,%rdx 1890 addq %r9,%r8 1891 leaq 8(%rdi),%rdi 1892 movq %rdx,%r9 1893 adcq $0,%r9 1894 1895 mulq %rbx 1896 addq %rax,%r10 1897 movq 24(%rbp),%rax 1898 adcq $0,%rdx 1899 addq %r10,%r9 1900 movq %rdx,%r10 1901 adcq $0,%r10 1902 1903 mulq %rbx 1904 addq %rax,%r11 1905 movq 32(%rbp),%rax 1906 adcq $0,%rdx 1907 addq %r11,%r10 1908 movq %rdx,%r11 1909 adcq $0,%r11 1910 1911 mulq %rbx 1912 addq %rax,%r12 1913 movq 40(%rbp),%rax 1914 adcq $0,%rdx 1915 addq %r12,%r11 1916 movq %rdx,%r12 1917 adcq $0,%r12 1918 1919 mulq %rbx 1920 addq %rax,%r13 1921 movq 48(%rbp),%rax 1922 adcq $0,%rdx 1923 addq %r13,%r12 1924 movq %rdx,%r13 1925 adcq $0,%r13 1926 1927 mulq %rbx 1928 addq %rax,%r14 1929 movq 56(%rbp),%rax 1930 adcq $0,%rdx 1931 addq %r14,%r13 1932 movq %rdx,%r14 1933 adcq $0,%r14 1934 1935 mulq %rbx 1936 movq 48-16+8(%rsp,%rcx,8),%rbx 1937 addq %rax,%r15 1938 adcq $0,%rdx 1939 addq %r15,%r14 1940 movq 0(%rbp),%rax 1941 movq %rdx,%r15 1942 adcq $0,%r15 1943 1944 decl %ecx 1945 jnz L$8x_tail 1946 1947 leaq 64(%rbp),%rbp 1948 movq 8+8(%rsp),%rdx 1949 cmpq 0+8(%rsp),%rbp 1950 jae L$8x_tail_done 1951 1952 movq 48+56+8(%rsp),%rbx 1953 negq %rsi 1954 movq 0(%rbp),%rax 1955 adcq 0(%rdi),%r8 1956 adcq 8(%rdi),%r9 1957 adcq 16(%rdi),%r10 1958 adcq 24(%rdi),%r11 1959 adcq 32(%rdi),%r12 1960 adcq 40(%rdi),%r13 1961 adcq 48(%rdi),%r14 1962 adcq 56(%rdi),%r15 1963 sbbq %rsi,%rsi 1964 1965 movl $8,%ecx 1966 jmp L$8x_tail 1967 1968.p2align 5 1969L$8x_tail_done: 1970 xorq %rax,%rax 1971 addq (%rdx),%r8 1972 adcq $0,%r9 1973 adcq $0,%r10 1974 adcq $0,%r11 1975 adcq $0,%r12 1976 adcq $0,%r13 1977 adcq $0,%r14 1978 adcq $0,%r15 1979 adcq $0,%rax 1980 1981 negq %rsi 1982L$8x_no_tail: 1983 adcq 0(%rdi),%r8 1984 adcq 8(%rdi),%r9 1985 adcq 16(%rdi),%r10 1986 adcq 24(%rdi),%r11 1987 adcq 32(%rdi),%r12 1988 adcq 40(%rdi),%r13 1989 adcq 48(%rdi),%r14 1990 adcq 56(%rdi),%r15 1991 adcq $0,%rax 1992 movq -8(%rbp),%rcx 1993 xorq %rsi,%rsi 1994 1995.byte 102,72,15,126,213 1996 1997 movq %r8,0(%rdi) 1998 movq %r9,8(%rdi) 1999.byte 102,73,15,126,217 2000 movq %r10,16(%rdi) 2001 movq %r11,24(%rdi) 2002 movq %r12,32(%rdi) 2003 movq %r13,40(%rdi) 2004 movq %r14,48(%rdi) 2005 movq %r15,56(%rdi) 2006 leaq 64(%rdi),%rdi 2007 2008 cmpq %rdx,%rdi 2009 jb L$8x_reduction_loop 2010 .byte 0xf3,0xc3 2011 2012 2013 2014.p2align 5 2015__bn_post4x_internal: 2016 2017 movq 0(%rbp),%r12 2018 leaq (%rdi,%r9,1),%rbx 2019 movq %r9,%rcx 2020.byte 102,72,15,126,207 2021 negq %rax 2022.byte 102,72,15,126,206 2023 sarq $3+2,%rcx 2024 decq %r12 2025 xorq %r10,%r10 2026 movq 8(%rbp),%r13 2027 movq 16(%rbp),%r14 2028 movq 24(%rbp),%r15 2029 jmp L$sqr4x_sub_entry 2030 2031.p2align 4 2032L$sqr4x_sub: 2033 movq 0(%rbp),%r12 2034 movq 8(%rbp),%r13 2035 movq 16(%rbp),%r14 2036 movq 24(%rbp),%r15 2037L$sqr4x_sub_entry: 2038 leaq 32(%rbp),%rbp 2039 notq %r12 2040 notq %r13 2041 notq %r14 2042 notq %r15 2043 andq %rax,%r12 2044 andq %rax,%r13 2045 andq %rax,%r14 2046 andq %rax,%r15 2047 2048 negq %r10 2049 adcq 0(%rbx),%r12 2050 adcq 8(%rbx),%r13 2051 adcq 16(%rbx),%r14 2052 adcq 24(%rbx),%r15 2053 movq %r12,0(%rdi) 2054 leaq 32(%rbx),%rbx 2055 movq %r13,8(%rdi) 2056 sbbq %r10,%r10 2057 movq %r14,16(%rdi) 2058 movq %r15,24(%rdi) 2059 leaq 32(%rdi),%rdi 2060 2061 incq %rcx 2062 jnz L$sqr4x_sub 2063 2064 movq %r9,%r10 2065 negq %r9 2066 .byte 0xf3,0xc3 2067 2068 2069.globl _bn_from_montgomery 2070.private_extern _bn_from_montgomery 2071 2072.p2align 5 2073_bn_from_montgomery: 2074 2075 testl $7,%r9d 2076 jz bn_from_mont8x 2077 xorl %eax,%eax 2078 .byte 0xf3,0xc3 2079 2080 2081 2082 2083.p2align 5 2084bn_from_mont8x: 2085 2086.byte 0x67 2087 movq %rsp,%rax 2088 2089 pushq %rbx 2090 2091 pushq %rbp 2092 2093 pushq %r12 2094 2095 pushq %r13 2096 2097 pushq %r14 2098 2099 pushq %r15 2100 2101L$from_prologue: 2102 2103 shll $3,%r9d 2104 leaq (%r9,%r9,2),%r10 2105 negq %r9 2106 movq (%r8),%r8 2107 2108 2109 2110 2111 2112 2113 2114 2115 leaq -320(%rsp,%r9,2),%r11 2116 movq %rsp,%rbp 2117 subq %rdi,%r11 2118 andq $4095,%r11 2119 cmpq %r11,%r10 2120 jb L$from_sp_alt 2121 subq %r11,%rbp 2122 leaq -320(%rbp,%r9,2),%rbp 2123 jmp L$from_sp_done 2124 2125.p2align 5 2126L$from_sp_alt: 2127 leaq 4096-320(,%r9,2),%r10 2128 leaq -320(%rbp,%r9,2),%rbp 2129 subq %r10,%r11 2130 movq $0,%r10 2131 cmovcq %r10,%r11 2132 subq %r11,%rbp 2133L$from_sp_done: 2134 andq $-64,%rbp 2135 movq %rsp,%r11 2136 subq %rbp,%r11 2137 andq $-4096,%r11 2138 leaq (%r11,%rbp,1),%rsp 2139 movq (%rsp),%r10 2140 cmpq %rbp,%rsp 2141 ja L$from_page_walk 2142 jmp L$from_page_walk_done 2143 2144L$from_page_walk: 2145 leaq -4096(%rsp),%rsp 2146 movq (%rsp),%r10 2147 cmpq %rbp,%rsp 2148 ja L$from_page_walk 2149L$from_page_walk_done: 2150 2151 movq %r9,%r10 2152 negq %r9 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 movq %r8,32(%rsp) 2164 movq %rax,40(%rsp) 2165 2166L$from_body: 2167 movq %r9,%r11 2168 leaq 48(%rsp),%rax 2169 pxor %xmm0,%xmm0 2170 jmp L$mul_by_1 2171 2172.p2align 5 2173L$mul_by_1: 2174 movdqu (%rsi),%xmm1 2175 movdqu 16(%rsi),%xmm2 2176 movdqu 32(%rsi),%xmm3 2177 movdqa %xmm0,(%rax,%r9,1) 2178 movdqu 48(%rsi),%xmm4 2179 movdqa %xmm0,16(%rax,%r9,1) 2180.byte 0x48,0x8d,0xb6,0x40,0x00,0x00,0x00 2181 movdqa %xmm1,(%rax) 2182 movdqa %xmm0,32(%rax,%r9,1) 2183 movdqa %xmm2,16(%rax) 2184 movdqa %xmm0,48(%rax,%r9,1) 2185 movdqa %xmm3,32(%rax) 2186 movdqa %xmm4,48(%rax) 2187 leaq 64(%rax),%rax 2188 subq $64,%r11 2189 jnz L$mul_by_1 2190 2191.byte 102,72,15,110,207 2192.byte 102,72,15,110,209 2193.byte 0x67 2194 movq %rcx,%rbp 2195.byte 102,73,15,110,218 2196 leaq _OPENSSL_ia32cap_P(%rip),%r11 2197 movl 8(%r11),%r11d 2198 andl $0x80108,%r11d 2199 cmpl $0x80108,%r11d 2200 jne L$from_mont_nox 2201 2202 leaq (%rax,%r9,1),%rdi 2203 call __bn_sqrx8x_reduction 2204 call __bn_postx4x_internal 2205 2206 pxor %xmm0,%xmm0 2207 leaq 48(%rsp),%rax 2208 jmp L$from_mont_zero 2209 2210.p2align 5 2211L$from_mont_nox: 2212 call __bn_sqr8x_reduction 2213 call __bn_post4x_internal 2214 2215 pxor %xmm0,%xmm0 2216 leaq 48(%rsp),%rax 2217 jmp L$from_mont_zero 2218 2219.p2align 5 2220L$from_mont_zero: 2221 movq 40(%rsp),%rsi 2222 2223 movdqa %xmm0,0(%rax) 2224 movdqa %xmm0,16(%rax) 2225 movdqa %xmm0,32(%rax) 2226 movdqa %xmm0,48(%rax) 2227 leaq 64(%rax),%rax 2228 subq $32,%r9 2229 jnz L$from_mont_zero 2230 2231 movq $1,%rax 2232 movq -48(%rsi),%r15 2233 2234 movq -40(%rsi),%r14 2235 2236 movq -32(%rsi),%r13 2237 2238 movq -24(%rsi),%r12 2239 2240 movq -16(%rsi),%rbp 2241 2242 movq -8(%rsi),%rbx 2243 2244 leaq (%rsi),%rsp 2245 2246L$from_epilogue: 2247 .byte 0xf3,0xc3 2248 2249 2250 2251.p2align 5 2252bn_mulx4x_mont_gather5: 2253 2254 movq %rsp,%rax 2255 2256L$mulx4x_enter: 2257 pushq %rbx 2258 2259 pushq %rbp 2260 2261 pushq %r12 2262 2263 pushq %r13 2264 2265 pushq %r14 2266 2267 pushq %r15 2268 2269L$mulx4x_prologue: 2270 2271 shll $3,%r9d 2272 leaq (%r9,%r9,2),%r10 2273 negq %r9 2274 movq (%r8),%r8 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 leaq -320(%rsp,%r9,2),%r11 2286 movq %rsp,%rbp 2287 subq %rdi,%r11 2288 andq $4095,%r11 2289 cmpq %r11,%r10 2290 jb L$mulx4xsp_alt 2291 subq %r11,%rbp 2292 leaq -320(%rbp,%r9,2),%rbp 2293 jmp L$mulx4xsp_done 2294 2295L$mulx4xsp_alt: 2296 leaq 4096-320(,%r9,2),%r10 2297 leaq -320(%rbp,%r9,2),%rbp 2298 subq %r10,%r11 2299 movq $0,%r10 2300 cmovcq %r10,%r11 2301 subq %r11,%rbp 2302L$mulx4xsp_done: 2303 andq $-64,%rbp 2304 movq %rsp,%r11 2305 subq %rbp,%r11 2306 andq $-4096,%r11 2307 leaq (%r11,%rbp,1),%rsp 2308 movq (%rsp),%r10 2309 cmpq %rbp,%rsp 2310 ja L$mulx4x_page_walk 2311 jmp L$mulx4x_page_walk_done 2312 2313L$mulx4x_page_walk: 2314 leaq -4096(%rsp),%rsp 2315 movq (%rsp),%r10 2316 cmpq %rbp,%rsp 2317 ja L$mulx4x_page_walk 2318L$mulx4x_page_walk_done: 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 movq %r8,32(%rsp) 2333 movq %rax,40(%rsp) 2334 2335L$mulx4x_body: 2336 call mulx4x_internal 2337 2338 movq 40(%rsp),%rsi 2339 2340 movq $1,%rax 2341 2342 movq -48(%rsi),%r15 2343 2344 movq -40(%rsi),%r14 2345 2346 movq -32(%rsi),%r13 2347 2348 movq -24(%rsi),%r12 2349 2350 movq -16(%rsi),%rbp 2351 2352 movq -8(%rsi),%rbx 2353 2354 leaq (%rsi),%rsp 2355 2356L$mulx4x_epilogue: 2357 .byte 0xf3,0xc3 2358 2359 2360 2361 2362.p2align 5 2363mulx4x_internal: 2364 2365 movq %r9,8(%rsp) 2366 movq %r9,%r10 2367 negq %r9 2368 shlq $5,%r9 2369 negq %r10 2370 leaq 128(%rdx,%r9,1),%r13 2371 shrq $5+5,%r9 2372 movd 8(%rax),%xmm5 2373 subq $1,%r9 2374 leaq L$inc(%rip),%rax 2375 movq %r13,16+8(%rsp) 2376 movq %r9,24+8(%rsp) 2377 movq %rdi,56+8(%rsp) 2378 movdqa 0(%rax),%xmm0 2379 movdqa 16(%rax),%xmm1 2380 leaq 88-112(%rsp,%r10,1),%r10 2381 leaq 128(%rdx),%rdi 2382 2383 pshufd $0,%xmm5,%xmm5 2384 movdqa %xmm1,%xmm4 2385.byte 0x67 2386 movdqa %xmm1,%xmm2 2387.byte 0x67 2388 paddd %xmm0,%xmm1 2389 pcmpeqd %xmm5,%xmm0 2390 movdqa %xmm4,%xmm3 2391 paddd %xmm1,%xmm2 2392 pcmpeqd %xmm5,%xmm1 2393 movdqa %xmm0,112(%r10) 2394 movdqa %xmm4,%xmm0 2395 2396 paddd %xmm2,%xmm3 2397 pcmpeqd %xmm5,%xmm2 2398 movdqa %xmm1,128(%r10) 2399 movdqa %xmm4,%xmm1 2400 2401 paddd %xmm3,%xmm0 2402 pcmpeqd %xmm5,%xmm3 2403 movdqa %xmm2,144(%r10) 2404 movdqa %xmm4,%xmm2 2405 2406 paddd %xmm0,%xmm1 2407 pcmpeqd %xmm5,%xmm0 2408 movdqa %xmm3,160(%r10) 2409 movdqa %xmm4,%xmm3 2410 paddd %xmm1,%xmm2 2411 pcmpeqd %xmm5,%xmm1 2412 movdqa %xmm0,176(%r10) 2413 movdqa %xmm4,%xmm0 2414 2415 paddd %xmm2,%xmm3 2416 pcmpeqd %xmm5,%xmm2 2417 movdqa %xmm1,192(%r10) 2418 movdqa %xmm4,%xmm1 2419 2420 paddd %xmm3,%xmm0 2421 pcmpeqd %xmm5,%xmm3 2422 movdqa %xmm2,208(%r10) 2423 movdqa %xmm4,%xmm2 2424 2425 paddd %xmm0,%xmm1 2426 pcmpeqd %xmm5,%xmm0 2427 movdqa %xmm3,224(%r10) 2428 movdqa %xmm4,%xmm3 2429 paddd %xmm1,%xmm2 2430 pcmpeqd %xmm5,%xmm1 2431 movdqa %xmm0,240(%r10) 2432 movdqa %xmm4,%xmm0 2433 2434 paddd %xmm2,%xmm3 2435 pcmpeqd %xmm5,%xmm2 2436 movdqa %xmm1,256(%r10) 2437 movdqa %xmm4,%xmm1 2438 2439 paddd %xmm3,%xmm0 2440 pcmpeqd %xmm5,%xmm3 2441 movdqa %xmm2,272(%r10) 2442 movdqa %xmm4,%xmm2 2443 2444 paddd %xmm0,%xmm1 2445 pcmpeqd %xmm5,%xmm0 2446 movdqa %xmm3,288(%r10) 2447 movdqa %xmm4,%xmm3 2448.byte 0x67 2449 paddd %xmm1,%xmm2 2450 pcmpeqd %xmm5,%xmm1 2451 movdqa %xmm0,304(%r10) 2452 2453 paddd %xmm2,%xmm3 2454 pcmpeqd %xmm5,%xmm2 2455 movdqa %xmm1,320(%r10) 2456 2457 pcmpeqd %xmm5,%xmm3 2458 movdqa %xmm2,336(%r10) 2459 2460 pand 64(%rdi),%xmm0 2461 pand 80(%rdi),%xmm1 2462 pand 96(%rdi),%xmm2 2463 movdqa %xmm3,352(%r10) 2464 pand 112(%rdi),%xmm3 2465 por %xmm2,%xmm0 2466 por %xmm3,%xmm1 2467 movdqa -128(%rdi),%xmm4 2468 movdqa -112(%rdi),%xmm5 2469 movdqa -96(%rdi),%xmm2 2470 pand 112(%r10),%xmm4 2471 movdqa -80(%rdi),%xmm3 2472 pand 128(%r10),%xmm5 2473 por %xmm4,%xmm0 2474 pand 144(%r10),%xmm2 2475 por %xmm5,%xmm1 2476 pand 160(%r10),%xmm3 2477 por %xmm2,%xmm0 2478 por %xmm3,%xmm1 2479 movdqa -64(%rdi),%xmm4 2480 movdqa -48(%rdi),%xmm5 2481 movdqa -32(%rdi),%xmm2 2482 pand 176(%r10),%xmm4 2483 movdqa -16(%rdi),%xmm3 2484 pand 192(%r10),%xmm5 2485 por %xmm4,%xmm0 2486 pand 208(%r10),%xmm2 2487 por %xmm5,%xmm1 2488 pand 224(%r10),%xmm3 2489 por %xmm2,%xmm0 2490 por %xmm3,%xmm1 2491 movdqa 0(%rdi),%xmm4 2492 movdqa 16(%rdi),%xmm5 2493 movdqa 32(%rdi),%xmm2 2494 pand 240(%r10),%xmm4 2495 movdqa 48(%rdi),%xmm3 2496 pand 256(%r10),%xmm5 2497 por %xmm4,%xmm0 2498 pand 272(%r10),%xmm2 2499 por %xmm5,%xmm1 2500 pand 288(%r10),%xmm3 2501 por %xmm2,%xmm0 2502 por %xmm3,%xmm1 2503 pxor %xmm1,%xmm0 2504 pshufd $0x4e,%xmm0,%xmm1 2505 por %xmm1,%xmm0 2506 leaq 256(%rdi),%rdi 2507.byte 102,72,15,126,194 2508 leaq 64+32+8(%rsp),%rbx 2509 2510 movq %rdx,%r9 2511 mulxq 0(%rsi),%r8,%rax 2512 mulxq 8(%rsi),%r11,%r12 2513 addq %rax,%r11 2514 mulxq 16(%rsi),%rax,%r13 2515 adcq %rax,%r12 2516 adcq $0,%r13 2517 mulxq 24(%rsi),%rax,%r14 2518 2519 movq %r8,%r15 2520 imulq 32+8(%rsp),%r8 2521 xorq %rbp,%rbp 2522 movq %r8,%rdx 2523 2524 movq %rdi,8+8(%rsp) 2525 2526 leaq 32(%rsi),%rsi 2527 adcxq %rax,%r13 2528 adcxq %rbp,%r14 2529 2530 mulxq 0(%rcx),%rax,%r10 2531 adcxq %rax,%r15 2532 adoxq %r11,%r10 2533 mulxq 8(%rcx),%rax,%r11 2534 adcxq %rax,%r10 2535 adoxq %r12,%r11 2536 mulxq 16(%rcx),%rax,%r12 2537 movq 24+8(%rsp),%rdi 2538 movq %r10,-32(%rbx) 2539 adcxq %rax,%r11 2540 adoxq %r13,%r12 2541 mulxq 24(%rcx),%rax,%r15 2542 movq %r9,%rdx 2543 movq %r11,-24(%rbx) 2544 adcxq %rax,%r12 2545 adoxq %rbp,%r15 2546 leaq 32(%rcx),%rcx 2547 movq %r12,-16(%rbx) 2548 jmp L$mulx4x_1st 2549 2550.p2align 5 2551L$mulx4x_1st: 2552 adcxq %rbp,%r15 2553 mulxq 0(%rsi),%r10,%rax 2554 adcxq %r14,%r10 2555 mulxq 8(%rsi),%r11,%r14 2556 adcxq %rax,%r11 2557 mulxq 16(%rsi),%r12,%rax 2558 adcxq %r14,%r12 2559 mulxq 24(%rsi),%r13,%r14 2560.byte 0x67,0x67 2561 movq %r8,%rdx 2562 adcxq %rax,%r13 2563 adcxq %rbp,%r14 2564 leaq 32(%rsi),%rsi 2565 leaq 32(%rbx),%rbx 2566 2567 adoxq %r15,%r10 2568 mulxq 0(%rcx),%rax,%r15 2569 adcxq %rax,%r10 2570 adoxq %r15,%r11 2571 mulxq 8(%rcx),%rax,%r15 2572 adcxq %rax,%r11 2573 adoxq %r15,%r12 2574 mulxq 16(%rcx),%rax,%r15 2575 movq %r10,-40(%rbx) 2576 adcxq %rax,%r12 2577 movq %r11,-32(%rbx) 2578 adoxq %r15,%r13 2579 mulxq 24(%rcx),%rax,%r15 2580 movq %r9,%rdx 2581 movq %r12,-24(%rbx) 2582 adcxq %rax,%r13 2583 adoxq %rbp,%r15 2584 leaq 32(%rcx),%rcx 2585 movq %r13,-16(%rbx) 2586 2587 decq %rdi 2588 jnz L$mulx4x_1st 2589 2590 movq 8(%rsp),%rax 2591 adcq %rbp,%r15 2592 leaq (%rsi,%rax,1),%rsi 2593 addq %r15,%r14 2594 movq 8+8(%rsp),%rdi 2595 adcq %rbp,%rbp 2596 movq %r14,-8(%rbx) 2597 jmp L$mulx4x_outer 2598 2599.p2align 5 2600L$mulx4x_outer: 2601 leaq 16-256(%rbx),%r10 2602 pxor %xmm4,%xmm4 2603.byte 0x67,0x67 2604 pxor %xmm5,%xmm5 2605 movdqa -128(%rdi),%xmm0 2606 movdqa -112(%rdi),%xmm1 2607 movdqa -96(%rdi),%xmm2 2608 pand 256(%r10),%xmm0 2609 movdqa -80(%rdi),%xmm3 2610 pand 272(%r10),%xmm1 2611 por %xmm0,%xmm4 2612 pand 288(%r10),%xmm2 2613 por %xmm1,%xmm5 2614 pand 304(%r10),%xmm3 2615 por %xmm2,%xmm4 2616 por %xmm3,%xmm5 2617 movdqa -64(%rdi),%xmm0 2618 movdqa -48(%rdi),%xmm1 2619 movdqa -32(%rdi),%xmm2 2620 pand 320(%r10),%xmm0 2621 movdqa -16(%rdi),%xmm3 2622 pand 336(%r10),%xmm1 2623 por %xmm0,%xmm4 2624 pand 352(%r10),%xmm2 2625 por %xmm1,%xmm5 2626 pand 368(%r10),%xmm3 2627 por %xmm2,%xmm4 2628 por %xmm3,%xmm5 2629 movdqa 0(%rdi),%xmm0 2630 movdqa 16(%rdi),%xmm1 2631 movdqa 32(%rdi),%xmm2 2632 pand 384(%r10),%xmm0 2633 movdqa 48(%rdi),%xmm3 2634 pand 400(%r10),%xmm1 2635 por %xmm0,%xmm4 2636 pand 416(%r10),%xmm2 2637 por %xmm1,%xmm5 2638 pand 432(%r10),%xmm3 2639 por %xmm2,%xmm4 2640 por %xmm3,%xmm5 2641 movdqa 64(%rdi),%xmm0 2642 movdqa 80(%rdi),%xmm1 2643 movdqa 96(%rdi),%xmm2 2644 pand 448(%r10),%xmm0 2645 movdqa 112(%rdi),%xmm3 2646 pand 464(%r10),%xmm1 2647 por %xmm0,%xmm4 2648 pand 480(%r10),%xmm2 2649 por %xmm1,%xmm5 2650 pand 496(%r10),%xmm3 2651 por %xmm2,%xmm4 2652 por %xmm3,%xmm5 2653 por %xmm5,%xmm4 2654 pshufd $0x4e,%xmm4,%xmm0 2655 por %xmm4,%xmm0 2656 leaq 256(%rdi),%rdi 2657.byte 102,72,15,126,194 2658 2659 movq %rbp,(%rbx) 2660 leaq 32(%rbx,%rax,1),%rbx 2661 mulxq 0(%rsi),%r8,%r11 2662 xorq %rbp,%rbp 2663 movq %rdx,%r9 2664 mulxq 8(%rsi),%r14,%r12 2665 adoxq -32(%rbx),%r8 2666 adcxq %r14,%r11 2667 mulxq 16(%rsi),%r15,%r13 2668 adoxq -24(%rbx),%r11 2669 adcxq %r15,%r12 2670 mulxq 24(%rsi),%rdx,%r14 2671 adoxq -16(%rbx),%r12 2672 adcxq %rdx,%r13 2673 leaq (%rcx,%rax,1),%rcx 2674 leaq 32(%rsi),%rsi 2675 adoxq -8(%rbx),%r13 2676 adcxq %rbp,%r14 2677 adoxq %rbp,%r14 2678 2679 movq %r8,%r15 2680 imulq 32+8(%rsp),%r8 2681 2682 movq %r8,%rdx 2683 xorq %rbp,%rbp 2684 movq %rdi,8+8(%rsp) 2685 2686 mulxq 0(%rcx),%rax,%r10 2687 adcxq %rax,%r15 2688 adoxq %r11,%r10 2689 mulxq 8(%rcx),%rax,%r11 2690 adcxq %rax,%r10 2691 adoxq %r12,%r11 2692 mulxq 16(%rcx),%rax,%r12 2693 adcxq %rax,%r11 2694 adoxq %r13,%r12 2695 mulxq 24(%rcx),%rax,%r15 2696 movq %r9,%rdx 2697 movq 24+8(%rsp),%rdi 2698 movq %r10,-32(%rbx) 2699 adcxq %rax,%r12 2700 movq %r11,-24(%rbx) 2701 adoxq %rbp,%r15 2702 movq %r12,-16(%rbx) 2703 leaq 32(%rcx),%rcx 2704 jmp L$mulx4x_inner 2705 2706.p2align 5 2707L$mulx4x_inner: 2708 mulxq 0(%rsi),%r10,%rax 2709 adcxq %rbp,%r15 2710 adoxq %r14,%r10 2711 mulxq 8(%rsi),%r11,%r14 2712 adcxq 0(%rbx),%r10 2713 adoxq %rax,%r11 2714 mulxq 16(%rsi),%r12,%rax 2715 adcxq 8(%rbx),%r11 2716 adoxq %r14,%r12 2717 mulxq 24(%rsi),%r13,%r14 2718 movq %r8,%rdx 2719 adcxq 16(%rbx),%r12 2720 adoxq %rax,%r13 2721 adcxq 24(%rbx),%r13 2722 adoxq %rbp,%r14 2723 leaq 32(%rsi),%rsi 2724 leaq 32(%rbx),%rbx 2725 adcxq %rbp,%r14 2726 2727 adoxq %r15,%r10 2728 mulxq 0(%rcx),%rax,%r15 2729 adcxq %rax,%r10 2730 adoxq %r15,%r11 2731 mulxq 8(%rcx),%rax,%r15 2732 adcxq %rax,%r11 2733 adoxq %r15,%r12 2734 mulxq 16(%rcx),%rax,%r15 2735 movq %r10,-40(%rbx) 2736 adcxq %rax,%r12 2737 adoxq %r15,%r13 2738 movq %r11,-32(%rbx) 2739 mulxq 24(%rcx),%rax,%r15 2740 movq %r9,%rdx 2741 leaq 32(%rcx),%rcx 2742 movq %r12,-24(%rbx) 2743 adcxq %rax,%r13 2744 adoxq %rbp,%r15 2745 movq %r13,-16(%rbx) 2746 2747 decq %rdi 2748 jnz L$mulx4x_inner 2749 2750 movq 0+8(%rsp),%rax 2751 adcq %rbp,%r15 2752 subq 0(%rbx),%rdi 2753 movq 8+8(%rsp),%rdi 2754 movq 16+8(%rsp),%r10 2755 adcq %r15,%r14 2756 leaq (%rsi,%rax,1),%rsi 2757 adcq %rbp,%rbp 2758 movq %r14,-8(%rbx) 2759 2760 cmpq %r10,%rdi 2761 jb L$mulx4x_outer 2762 2763 movq -8(%rcx),%r10 2764 movq %rbp,%r8 2765 movq (%rcx,%rax,1),%r12 2766 leaq (%rcx,%rax,1),%rbp 2767 movq %rax,%rcx 2768 leaq (%rbx,%rax,1),%rdi 2769 xorl %eax,%eax 2770 xorq %r15,%r15 2771 subq %r14,%r10 2772 adcq %r15,%r15 2773 orq %r15,%r8 2774 sarq $3+2,%rcx 2775 subq %r8,%rax 2776 movq 56+8(%rsp),%rdx 2777 decq %r12 2778 movq 8(%rbp),%r13 2779 xorq %r8,%r8 2780 movq 16(%rbp),%r14 2781 movq 24(%rbp),%r15 2782 jmp L$sqrx4x_sub_entry 2783 2784 2785 2786.p2align 5 2787bn_powerx5: 2788 2789 movq %rsp,%rax 2790 2791L$powerx5_enter: 2792 pushq %rbx 2793 2794 pushq %rbp 2795 2796 pushq %r12 2797 2798 pushq %r13 2799 2800 pushq %r14 2801 2802 pushq %r15 2803 2804L$powerx5_prologue: 2805 2806 shll $3,%r9d 2807 leaq (%r9,%r9,2),%r10 2808 negq %r9 2809 movq (%r8),%r8 2810 2811 2812 2813 2814 2815 2816 2817 2818 leaq -320(%rsp,%r9,2),%r11 2819 movq %rsp,%rbp 2820 subq %rdi,%r11 2821 andq $4095,%r11 2822 cmpq %r11,%r10 2823 jb L$pwrx_sp_alt 2824 subq %r11,%rbp 2825 leaq -320(%rbp,%r9,2),%rbp 2826 jmp L$pwrx_sp_done 2827 2828.p2align 5 2829L$pwrx_sp_alt: 2830 leaq 4096-320(,%r9,2),%r10 2831 leaq -320(%rbp,%r9,2),%rbp 2832 subq %r10,%r11 2833 movq $0,%r10 2834 cmovcq %r10,%r11 2835 subq %r11,%rbp 2836L$pwrx_sp_done: 2837 andq $-64,%rbp 2838 movq %rsp,%r11 2839 subq %rbp,%r11 2840 andq $-4096,%r11 2841 leaq (%r11,%rbp,1),%rsp 2842 movq (%rsp),%r10 2843 cmpq %rbp,%rsp 2844 ja L$pwrx_page_walk 2845 jmp L$pwrx_page_walk_done 2846 2847L$pwrx_page_walk: 2848 leaq -4096(%rsp),%rsp 2849 movq (%rsp),%r10 2850 cmpq %rbp,%rsp 2851 ja L$pwrx_page_walk 2852L$pwrx_page_walk_done: 2853 2854 movq %r9,%r10 2855 negq %r9 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 pxor %xmm0,%xmm0 2869.byte 102,72,15,110,207 2870.byte 102,72,15,110,209 2871.byte 102,73,15,110,218 2872.byte 102,72,15,110,226 2873 movq %r8,32(%rsp) 2874 movq %rax,40(%rsp) 2875 2876L$powerx5_body: 2877 2878 call __bn_sqrx8x_internal 2879 call __bn_postx4x_internal 2880 call __bn_sqrx8x_internal 2881 call __bn_postx4x_internal 2882 call __bn_sqrx8x_internal 2883 call __bn_postx4x_internal 2884 call __bn_sqrx8x_internal 2885 call __bn_postx4x_internal 2886 call __bn_sqrx8x_internal 2887 call __bn_postx4x_internal 2888 2889 movq %r10,%r9 2890 movq %rsi,%rdi 2891.byte 102,72,15,126,209 2892.byte 102,72,15,126,226 2893 movq 40(%rsp),%rax 2894 2895 call mulx4x_internal 2896 2897 movq 40(%rsp),%rsi 2898 2899 movq $1,%rax 2900 2901 movq -48(%rsi),%r15 2902 2903 movq -40(%rsi),%r14 2904 2905 movq -32(%rsi),%r13 2906 2907 movq -24(%rsi),%r12 2908 2909 movq -16(%rsi),%rbp 2910 2911 movq -8(%rsi),%rbx 2912 2913 leaq (%rsi),%rsp 2914 2915L$powerx5_epilogue: 2916 .byte 0xf3,0xc3 2917 2918 2919 2920.globl _bn_sqrx8x_internal 2921.private_extern _bn_sqrx8x_internal 2922.private_extern _bn_sqrx8x_internal 2923 2924.p2align 5 2925_bn_sqrx8x_internal: 2926__bn_sqrx8x_internal: 2927 2928 2929 2930 2931 2932 2933 2934 2935 2936 2937 2938 2939 2940 2941 2942 2943 2944 2945 2946 2947 2948 2949 2950 2951 2952 2953 2954 2955 2956 2957 2958 2959 2960 2961 2962 2963 2964 2965 2966 2967 2968 leaq 48+8(%rsp),%rdi 2969 leaq (%rsi,%r9,1),%rbp 2970 movq %r9,0+8(%rsp) 2971 movq %rbp,8+8(%rsp) 2972 jmp L$sqr8x_zero_start 2973 2974.p2align 5 2975.byte 0x66,0x66,0x66,0x2e,0x0f,0x1f,0x84,0x00,0x00,0x00,0x00,0x00 2976L$sqrx8x_zero: 2977.byte 0x3e 2978 movdqa %xmm0,0(%rdi) 2979 movdqa %xmm0,16(%rdi) 2980 movdqa %xmm0,32(%rdi) 2981 movdqa %xmm0,48(%rdi) 2982L$sqr8x_zero_start: 2983 movdqa %xmm0,64(%rdi) 2984 movdqa %xmm0,80(%rdi) 2985 movdqa %xmm0,96(%rdi) 2986 movdqa %xmm0,112(%rdi) 2987 leaq 128(%rdi),%rdi 2988 subq $64,%r9 2989 jnz L$sqrx8x_zero 2990 2991 movq 0(%rsi),%rdx 2992 2993 xorq %r10,%r10 2994 xorq %r11,%r11 2995 xorq %r12,%r12 2996 xorq %r13,%r13 2997 xorq %r14,%r14 2998 xorq %r15,%r15 2999 leaq 48+8(%rsp),%rdi 3000 xorq %rbp,%rbp 3001 jmp L$sqrx8x_outer_loop 3002 3003.p2align 5 3004L$sqrx8x_outer_loop: 3005 mulxq 8(%rsi),%r8,%rax 3006 adcxq %r9,%r8 3007 adoxq %rax,%r10 3008 mulxq 16(%rsi),%r9,%rax 3009 adcxq %r10,%r9 3010 adoxq %rax,%r11 3011.byte 0xc4,0xe2,0xab,0xf6,0x86,0x18,0x00,0x00,0x00 3012 adcxq %r11,%r10 3013 adoxq %rax,%r12 3014.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x20,0x00,0x00,0x00 3015 adcxq %r12,%r11 3016 adoxq %rax,%r13 3017 mulxq 40(%rsi),%r12,%rax 3018 adcxq %r13,%r12 3019 adoxq %rax,%r14 3020 mulxq 48(%rsi),%r13,%rax 3021 adcxq %r14,%r13 3022 adoxq %r15,%rax 3023 mulxq 56(%rsi),%r14,%r15 3024 movq 8(%rsi),%rdx 3025 adcxq %rax,%r14 3026 adoxq %rbp,%r15 3027 adcq 64(%rdi),%r15 3028 movq %r8,8(%rdi) 3029 movq %r9,16(%rdi) 3030 sbbq %rcx,%rcx 3031 xorq %rbp,%rbp 3032 3033 3034 mulxq 16(%rsi),%r8,%rbx 3035 mulxq 24(%rsi),%r9,%rax 3036 adcxq %r10,%r8 3037 adoxq %rbx,%r9 3038 mulxq 32(%rsi),%r10,%rbx 3039 adcxq %r11,%r9 3040 adoxq %rax,%r10 3041.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x28,0x00,0x00,0x00 3042 adcxq %r12,%r10 3043 adoxq %rbx,%r11 3044.byte 0xc4,0xe2,0x9b,0xf6,0x9e,0x30,0x00,0x00,0x00 3045 adcxq %r13,%r11 3046 adoxq %r14,%r12 3047.byte 0xc4,0x62,0x93,0xf6,0xb6,0x38,0x00,0x00,0x00 3048 movq 16(%rsi),%rdx 3049 adcxq %rax,%r12 3050 adoxq %rbx,%r13 3051 adcxq %r15,%r13 3052 adoxq %rbp,%r14 3053 adcxq %rbp,%r14 3054 3055 movq %r8,24(%rdi) 3056 movq %r9,32(%rdi) 3057 3058 mulxq 24(%rsi),%r8,%rbx 3059 mulxq 32(%rsi),%r9,%rax 3060 adcxq %r10,%r8 3061 adoxq %rbx,%r9 3062 mulxq 40(%rsi),%r10,%rbx 3063 adcxq %r11,%r9 3064 adoxq %rax,%r10 3065.byte 0xc4,0xe2,0xa3,0xf6,0x86,0x30,0x00,0x00,0x00 3066 adcxq %r12,%r10 3067 adoxq %r13,%r11 3068.byte 0xc4,0x62,0x9b,0xf6,0xae,0x38,0x00,0x00,0x00 3069.byte 0x3e 3070 movq 24(%rsi),%rdx 3071 adcxq %rbx,%r11 3072 adoxq %rax,%r12 3073 adcxq %r14,%r12 3074 movq %r8,40(%rdi) 3075 movq %r9,48(%rdi) 3076 mulxq 32(%rsi),%r8,%rax 3077 adoxq %rbp,%r13 3078 adcxq %rbp,%r13 3079 3080 mulxq 40(%rsi),%r9,%rbx 3081 adcxq %r10,%r8 3082 adoxq %rax,%r9 3083 mulxq 48(%rsi),%r10,%rax 3084 adcxq %r11,%r9 3085 adoxq %r12,%r10 3086 mulxq 56(%rsi),%r11,%r12 3087 movq 32(%rsi),%rdx 3088 movq 40(%rsi),%r14 3089 adcxq %rbx,%r10 3090 adoxq %rax,%r11 3091 movq 48(%rsi),%r15 3092 adcxq %r13,%r11 3093 adoxq %rbp,%r12 3094 adcxq %rbp,%r12 3095 3096 movq %r8,56(%rdi) 3097 movq %r9,64(%rdi) 3098 3099 mulxq %r14,%r9,%rax 3100 movq 56(%rsi),%r8 3101 adcxq %r10,%r9 3102 mulxq %r15,%r10,%rbx 3103 adoxq %rax,%r10 3104 adcxq %r11,%r10 3105 mulxq %r8,%r11,%rax 3106 movq %r14,%rdx 3107 adoxq %rbx,%r11 3108 adcxq %r12,%r11 3109 3110 adcxq %rbp,%rax 3111 3112 mulxq %r15,%r14,%rbx 3113 mulxq %r8,%r12,%r13 3114 movq %r15,%rdx 3115 leaq 64(%rsi),%rsi 3116 adcxq %r14,%r11 3117 adoxq %rbx,%r12 3118 adcxq %rax,%r12 3119 adoxq %rbp,%r13 3120 3121.byte 0x67,0x67 3122 mulxq %r8,%r8,%r14 3123 adcxq %r8,%r13 3124 adcxq %rbp,%r14 3125 3126 cmpq 8+8(%rsp),%rsi 3127 je L$sqrx8x_outer_break 3128 3129 negq %rcx 3130 movq $-8,%rcx 3131 movq %rbp,%r15 3132 movq 64(%rdi),%r8 3133 adcxq 72(%rdi),%r9 3134 adcxq 80(%rdi),%r10 3135 adcxq 88(%rdi),%r11 3136 adcq 96(%rdi),%r12 3137 adcq 104(%rdi),%r13 3138 adcq 112(%rdi),%r14 3139 adcq 120(%rdi),%r15 3140 leaq (%rsi),%rbp 3141 leaq 128(%rdi),%rdi 3142 sbbq %rax,%rax 3143 3144 movq -64(%rsi),%rdx 3145 movq %rax,16+8(%rsp) 3146 movq %rdi,24+8(%rsp) 3147 3148 3149 xorl %eax,%eax 3150 jmp L$sqrx8x_loop 3151 3152.p2align 5 3153L$sqrx8x_loop: 3154 movq %r8,%rbx 3155 mulxq 0(%rbp),%rax,%r8 3156 adcxq %rax,%rbx 3157 adoxq %r9,%r8 3158 3159 mulxq 8(%rbp),%rax,%r9 3160 adcxq %rax,%r8 3161 adoxq %r10,%r9 3162 3163 mulxq 16(%rbp),%rax,%r10 3164 adcxq %rax,%r9 3165 adoxq %r11,%r10 3166 3167 mulxq 24(%rbp),%rax,%r11 3168 adcxq %rax,%r10 3169 adoxq %r12,%r11 3170 3171.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3172 adcxq %rax,%r11 3173 adoxq %r13,%r12 3174 3175 mulxq 40(%rbp),%rax,%r13 3176 adcxq %rax,%r12 3177 adoxq %r14,%r13 3178 3179 mulxq 48(%rbp),%rax,%r14 3180 movq %rbx,(%rdi,%rcx,8) 3181 movl $0,%ebx 3182 adcxq %rax,%r13 3183 adoxq %r15,%r14 3184 3185.byte 0xc4,0x62,0xfb,0xf6,0xbd,0x38,0x00,0x00,0x00 3186 movq 8(%rsi,%rcx,8),%rdx 3187 adcxq %rax,%r14 3188 adoxq %rbx,%r15 3189 adcxq %rbx,%r15 3190 3191.byte 0x67 3192 incq %rcx 3193 jnz L$sqrx8x_loop 3194 3195 leaq 64(%rbp),%rbp 3196 movq $-8,%rcx 3197 cmpq 8+8(%rsp),%rbp 3198 je L$sqrx8x_break 3199 3200 subq 16+8(%rsp),%rbx 3201.byte 0x66 3202 movq -64(%rsi),%rdx 3203 adcxq 0(%rdi),%r8 3204 adcxq 8(%rdi),%r9 3205 adcq 16(%rdi),%r10 3206 adcq 24(%rdi),%r11 3207 adcq 32(%rdi),%r12 3208 adcq 40(%rdi),%r13 3209 adcq 48(%rdi),%r14 3210 adcq 56(%rdi),%r15 3211 leaq 64(%rdi),%rdi 3212.byte 0x67 3213 sbbq %rax,%rax 3214 xorl %ebx,%ebx 3215 movq %rax,16+8(%rsp) 3216 jmp L$sqrx8x_loop 3217 3218.p2align 5 3219L$sqrx8x_break: 3220 xorq %rbp,%rbp 3221 subq 16+8(%rsp),%rbx 3222 adcxq %rbp,%r8 3223 movq 24+8(%rsp),%rcx 3224 adcxq %rbp,%r9 3225 movq 0(%rsi),%rdx 3226 adcq $0,%r10 3227 movq %r8,0(%rdi) 3228 adcq $0,%r11 3229 adcq $0,%r12 3230 adcq $0,%r13 3231 adcq $0,%r14 3232 adcq $0,%r15 3233 cmpq %rcx,%rdi 3234 je L$sqrx8x_outer_loop 3235 3236 movq %r9,8(%rdi) 3237 movq 8(%rcx),%r9 3238 movq %r10,16(%rdi) 3239 movq 16(%rcx),%r10 3240 movq %r11,24(%rdi) 3241 movq 24(%rcx),%r11 3242 movq %r12,32(%rdi) 3243 movq 32(%rcx),%r12 3244 movq %r13,40(%rdi) 3245 movq 40(%rcx),%r13 3246 movq %r14,48(%rdi) 3247 movq 48(%rcx),%r14 3248 movq %r15,56(%rdi) 3249 movq 56(%rcx),%r15 3250 movq %rcx,%rdi 3251 jmp L$sqrx8x_outer_loop 3252 3253.p2align 5 3254L$sqrx8x_outer_break: 3255 movq %r9,72(%rdi) 3256.byte 102,72,15,126,217 3257 movq %r10,80(%rdi) 3258 movq %r11,88(%rdi) 3259 movq %r12,96(%rdi) 3260 movq %r13,104(%rdi) 3261 movq %r14,112(%rdi) 3262 leaq 48+8(%rsp),%rdi 3263 movq (%rsi,%rcx,1),%rdx 3264 3265 movq 8(%rdi),%r11 3266 xorq %r10,%r10 3267 movq 0+8(%rsp),%r9 3268 adoxq %r11,%r11 3269 movq 16(%rdi),%r12 3270 movq 24(%rdi),%r13 3271 3272 3273.p2align 5 3274L$sqrx4x_shift_n_add: 3275 mulxq %rdx,%rax,%rbx 3276 adoxq %r12,%r12 3277 adcxq %r10,%rax 3278.byte 0x48,0x8b,0x94,0x0e,0x08,0x00,0x00,0x00 3279.byte 0x4c,0x8b,0x97,0x20,0x00,0x00,0x00 3280 adoxq %r13,%r13 3281 adcxq %r11,%rbx 3282 movq 40(%rdi),%r11 3283 movq %rax,0(%rdi) 3284 movq %rbx,8(%rdi) 3285 3286 mulxq %rdx,%rax,%rbx 3287 adoxq %r10,%r10 3288 adcxq %r12,%rax 3289 movq 16(%rsi,%rcx,1),%rdx 3290 movq 48(%rdi),%r12 3291 adoxq %r11,%r11 3292 adcxq %r13,%rbx 3293 movq 56(%rdi),%r13 3294 movq %rax,16(%rdi) 3295 movq %rbx,24(%rdi) 3296 3297 mulxq %rdx,%rax,%rbx 3298 adoxq %r12,%r12 3299 adcxq %r10,%rax 3300 movq 24(%rsi,%rcx,1),%rdx 3301 leaq 32(%rcx),%rcx 3302 movq 64(%rdi),%r10 3303 adoxq %r13,%r13 3304 adcxq %r11,%rbx 3305 movq 72(%rdi),%r11 3306 movq %rax,32(%rdi) 3307 movq %rbx,40(%rdi) 3308 3309 mulxq %rdx,%rax,%rbx 3310 adoxq %r10,%r10 3311 adcxq %r12,%rax 3312 jrcxz L$sqrx4x_shift_n_add_break 3313.byte 0x48,0x8b,0x94,0x0e,0x00,0x00,0x00,0x00 3314 adoxq %r11,%r11 3315 adcxq %r13,%rbx 3316 movq 80(%rdi),%r12 3317 movq 88(%rdi),%r13 3318 movq %rax,48(%rdi) 3319 movq %rbx,56(%rdi) 3320 leaq 64(%rdi),%rdi 3321 nop 3322 jmp L$sqrx4x_shift_n_add 3323 3324.p2align 5 3325L$sqrx4x_shift_n_add_break: 3326 adcxq %r13,%rbx 3327 movq %rax,48(%rdi) 3328 movq %rbx,56(%rdi) 3329 leaq 64(%rdi),%rdi 3330.byte 102,72,15,126,213 3331__bn_sqrx8x_reduction: 3332 xorl %eax,%eax 3333 movq 32+8(%rsp),%rbx 3334 movq 48+8(%rsp),%rdx 3335 leaq -64(%rbp,%r9,1),%rcx 3336 3337 movq %rcx,0+8(%rsp) 3338 movq %rdi,8+8(%rsp) 3339 3340 leaq 48+8(%rsp),%rdi 3341 jmp L$sqrx8x_reduction_loop 3342 3343.p2align 5 3344L$sqrx8x_reduction_loop: 3345 movq 8(%rdi),%r9 3346 movq 16(%rdi),%r10 3347 movq 24(%rdi),%r11 3348 movq 32(%rdi),%r12 3349 movq %rdx,%r8 3350 imulq %rbx,%rdx 3351 movq 40(%rdi),%r13 3352 movq 48(%rdi),%r14 3353 movq 56(%rdi),%r15 3354 movq %rax,24+8(%rsp) 3355 3356 leaq 64(%rdi),%rdi 3357 xorq %rsi,%rsi 3358 movq $-8,%rcx 3359 jmp L$sqrx8x_reduce 3360 3361.p2align 5 3362L$sqrx8x_reduce: 3363 movq %r8,%rbx 3364 mulxq 0(%rbp),%rax,%r8 3365 adcxq %rbx,%rax 3366 adoxq %r9,%r8 3367 3368 mulxq 8(%rbp),%rbx,%r9 3369 adcxq %rbx,%r8 3370 adoxq %r10,%r9 3371 3372 mulxq 16(%rbp),%rbx,%r10 3373 adcxq %rbx,%r9 3374 adoxq %r11,%r10 3375 3376 mulxq 24(%rbp),%rbx,%r11 3377 adcxq %rbx,%r10 3378 adoxq %r12,%r11 3379 3380.byte 0xc4,0x62,0xe3,0xf6,0xa5,0x20,0x00,0x00,0x00 3381 movq %rdx,%rax 3382 movq %r8,%rdx 3383 adcxq %rbx,%r11 3384 adoxq %r13,%r12 3385 3386 mulxq 32+8(%rsp),%rbx,%rdx 3387 movq %rax,%rdx 3388 movq %rax,64+48+8(%rsp,%rcx,8) 3389 3390 mulxq 40(%rbp),%rax,%r13 3391 adcxq %rax,%r12 3392 adoxq %r14,%r13 3393 3394 mulxq 48(%rbp),%rax,%r14 3395 adcxq %rax,%r13 3396 adoxq %r15,%r14 3397 3398 mulxq 56(%rbp),%rax,%r15 3399 movq %rbx,%rdx 3400 adcxq %rax,%r14 3401 adoxq %rsi,%r15 3402 adcxq %rsi,%r15 3403 3404.byte 0x67,0x67,0x67 3405 incq %rcx 3406 jnz L$sqrx8x_reduce 3407 3408 movq %rsi,%rax 3409 cmpq 0+8(%rsp),%rbp 3410 jae L$sqrx8x_no_tail 3411 3412 movq 48+8(%rsp),%rdx 3413 addq 0(%rdi),%r8 3414 leaq 64(%rbp),%rbp 3415 movq $-8,%rcx 3416 adcxq 8(%rdi),%r9 3417 adcxq 16(%rdi),%r10 3418 adcq 24(%rdi),%r11 3419 adcq 32(%rdi),%r12 3420 adcq 40(%rdi),%r13 3421 adcq 48(%rdi),%r14 3422 adcq 56(%rdi),%r15 3423 leaq 64(%rdi),%rdi 3424 sbbq %rax,%rax 3425 3426 xorq %rsi,%rsi 3427 movq %rax,16+8(%rsp) 3428 jmp L$sqrx8x_tail 3429 3430.p2align 5 3431L$sqrx8x_tail: 3432 movq %r8,%rbx 3433 mulxq 0(%rbp),%rax,%r8 3434 adcxq %rax,%rbx 3435 adoxq %r9,%r8 3436 3437 mulxq 8(%rbp),%rax,%r9 3438 adcxq %rax,%r8 3439 adoxq %r10,%r9 3440 3441 mulxq 16(%rbp),%rax,%r10 3442 adcxq %rax,%r9 3443 adoxq %r11,%r10 3444 3445 mulxq 24(%rbp),%rax,%r11 3446 adcxq %rax,%r10 3447 adoxq %r12,%r11 3448 3449.byte 0xc4,0x62,0xfb,0xf6,0xa5,0x20,0x00,0x00,0x00 3450 adcxq %rax,%r11 3451 adoxq %r13,%r12 3452 3453 mulxq 40(%rbp),%rax,%r13 3454 adcxq %rax,%r12 3455 adoxq %r14,%r13 3456 3457 mulxq 48(%rbp),%rax,%r14 3458 adcxq %rax,%r13 3459 adoxq %r15,%r14 3460 3461 mulxq 56(%rbp),%rax,%r15 3462 movq 72+48+8(%rsp,%rcx,8),%rdx 3463 adcxq %rax,%r14 3464 adoxq %rsi,%r15 3465 movq %rbx,(%rdi,%rcx,8) 3466 movq %r8,%rbx 3467 adcxq %rsi,%r15 3468 3469 incq %rcx 3470 jnz L$sqrx8x_tail 3471 3472 cmpq 0+8(%rsp),%rbp 3473 jae L$sqrx8x_tail_done 3474 3475 subq 16+8(%rsp),%rsi 3476 movq 48+8(%rsp),%rdx 3477 leaq 64(%rbp),%rbp 3478 adcq 0(%rdi),%r8 3479 adcq 8(%rdi),%r9 3480 adcq 16(%rdi),%r10 3481 adcq 24(%rdi),%r11 3482 adcq 32(%rdi),%r12 3483 adcq 40(%rdi),%r13 3484 adcq 48(%rdi),%r14 3485 adcq 56(%rdi),%r15 3486 leaq 64(%rdi),%rdi 3487 sbbq %rax,%rax 3488 subq $8,%rcx 3489 3490 xorq %rsi,%rsi 3491 movq %rax,16+8(%rsp) 3492 jmp L$sqrx8x_tail 3493 3494.p2align 5 3495L$sqrx8x_tail_done: 3496 xorq %rax,%rax 3497 addq 24+8(%rsp),%r8 3498 adcq $0,%r9 3499 adcq $0,%r10 3500 adcq $0,%r11 3501 adcq $0,%r12 3502 adcq $0,%r13 3503 adcq $0,%r14 3504 adcq $0,%r15 3505 adcq $0,%rax 3506 3507 subq 16+8(%rsp),%rsi 3508L$sqrx8x_no_tail: 3509 adcq 0(%rdi),%r8 3510.byte 102,72,15,126,217 3511 adcq 8(%rdi),%r9 3512 movq 56(%rbp),%rsi 3513.byte 102,72,15,126,213 3514 adcq 16(%rdi),%r10 3515 adcq 24(%rdi),%r11 3516 adcq 32(%rdi),%r12 3517 adcq 40(%rdi),%r13 3518 adcq 48(%rdi),%r14 3519 adcq 56(%rdi),%r15 3520 adcq $0,%rax 3521 3522 movq 32+8(%rsp),%rbx 3523 movq 64(%rdi,%rcx,1),%rdx 3524 3525 movq %r8,0(%rdi) 3526 leaq 64(%rdi),%r8 3527 movq %r9,8(%rdi) 3528 movq %r10,16(%rdi) 3529 movq %r11,24(%rdi) 3530 movq %r12,32(%rdi) 3531 movq %r13,40(%rdi) 3532 movq %r14,48(%rdi) 3533 movq %r15,56(%rdi) 3534 3535 leaq 64(%rdi,%rcx,1),%rdi 3536 cmpq 8+8(%rsp),%r8 3537 jb L$sqrx8x_reduction_loop 3538 .byte 0xf3,0xc3 3539 3540 3541.p2align 5 3542 3543__bn_postx4x_internal: 3544 3545 movq 0(%rbp),%r12 3546 movq %rcx,%r10 3547 movq %rcx,%r9 3548 negq %rax 3549 sarq $3+2,%rcx 3550 3551.byte 102,72,15,126,202 3552.byte 102,72,15,126,206 3553 decq %r12 3554 movq 8(%rbp),%r13 3555 xorq %r8,%r8 3556 movq 16(%rbp),%r14 3557 movq 24(%rbp),%r15 3558 jmp L$sqrx4x_sub_entry 3559 3560.p2align 4 3561L$sqrx4x_sub: 3562 movq 0(%rbp),%r12 3563 movq 8(%rbp),%r13 3564 movq 16(%rbp),%r14 3565 movq 24(%rbp),%r15 3566L$sqrx4x_sub_entry: 3567 andnq %rax,%r12,%r12 3568 leaq 32(%rbp),%rbp 3569 andnq %rax,%r13,%r13 3570 andnq %rax,%r14,%r14 3571 andnq %rax,%r15,%r15 3572 3573 negq %r8 3574 adcq 0(%rdi),%r12 3575 adcq 8(%rdi),%r13 3576 adcq 16(%rdi),%r14 3577 adcq 24(%rdi),%r15 3578 movq %r12,0(%rdx) 3579 leaq 32(%rdi),%rdi 3580 movq %r13,8(%rdx) 3581 sbbq %r8,%r8 3582 movq %r14,16(%rdx) 3583 movq %r15,24(%rdx) 3584 leaq 32(%rdx),%rdx 3585 3586 incq %rcx 3587 jnz L$sqrx4x_sub 3588 3589 negq %r9 3590 3591 .byte 0xf3,0xc3 3592 3593 3594.globl _bn_scatter5 3595.private_extern _bn_scatter5 3596 3597.p2align 4 3598_bn_scatter5: 3599 3600 cmpl $0,%esi 3601 jz L$scatter_epilogue 3602 leaq (%rdx,%rcx,8),%rdx 3603L$scatter: 3604 movq (%rdi),%rax 3605 leaq 8(%rdi),%rdi 3606 movq %rax,(%rdx) 3607 leaq 256(%rdx),%rdx 3608 subl $1,%esi 3609 jnz L$scatter 3610L$scatter_epilogue: 3611 .byte 0xf3,0xc3 3612 3613 3614 3615.globl _bn_gather5 3616.private_extern _bn_gather5 3617 3618.p2align 5 3619_bn_gather5: 3620 3621L$SEH_begin_bn_gather5: 3622 3623.byte 0x4c,0x8d,0x14,0x24 3624 3625.byte 0x48,0x81,0xec,0x08,0x01,0x00,0x00 3626 leaq L$inc(%rip),%rax 3627 andq $-16,%rsp 3628 3629 movd %ecx,%xmm5 3630 movdqa 0(%rax),%xmm0 3631 movdqa 16(%rax),%xmm1 3632 leaq 128(%rdx),%r11 3633 leaq 128(%rsp),%rax 3634 3635 pshufd $0,%xmm5,%xmm5 3636 movdqa %xmm1,%xmm4 3637 movdqa %xmm1,%xmm2 3638 paddd %xmm0,%xmm1 3639 pcmpeqd %xmm5,%xmm0 3640 movdqa %xmm4,%xmm3 3641 3642 paddd %xmm1,%xmm2 3643 pcmpeqd %xmm5,%xmm1 3644 movdqa %xmm0,-128(%rax) 3645 movdqa %xmm4,%xmm0 3646 3647 paddd %xmm2,%xmm3 3648 pcmpeqd %xmm5,%xmm2 3649 movdqa %xmm1,-112(%rax) 3650 movdqa %xmm4,%xmm1 3651 3652 paddd %xmm3,%xmm0 3653 pcmpeqd %xmm5,%xmm3 3654 movdqa %xmm2,-96(%rax) 3655 movdqa %xmm4,%xmm2 3656 paddd %xmm0,%xmm1 3657 pcmpeqd %xmm5,%xmm0 3658 movdqa %xmm3,-80(%rax) 3659 movdqa %xmm4,%xmm3 3660 3661 paddd %xmm1,%xmm2 3662 pcmpeqd %xmm5,%xmm1 3663 movdqa %xmm0,-64(%rax) 3664 movdqa %xmm4,%xmm0 3665 3666 paddd %xmm2,%xmm3 3667 pcmpeqd %xmm5,%xmm2 3668 movdqa %xmm1,-48(%rax) 3669 movdqa %xmm4,%xmm1 3670 3671 paddd %xmm3,%xmm0 3672 pcmpeqd %xmm5,%xmm3 3673 movdqa %xmm2,-32(%rax) 3674 movdqa %xmm4,%xmm2 3675 paddd %xmm0,%xmm1 3676 pcmpeqd %xmm5,%xmm0 3677 movdqa %xmm3,-16(%rax) 3678 movdqa %xmm4,%xmm3 3679 3680 paddd %xmm1,%xmm2 3681 pcmpeqd %xmm5,%xmm1 3682 movdqa %xmm0,0(%rax) 3683 movdqa %xmm4,%xmm0 3684 3685 paddd %xmm2,%xmm3 3686 pcmpeqd %xmm5,%xmm2 3687 movdqa %xmm1,16(%rax) 3688 movdqa %xmm4,%xmm1 3689 3690 paddd %xmm3,%xmm0 3691 pcmpeqd %xmm5,%xmm3 3692 movdqa %xmm2,32(%rax) 3693 movdqa %xmm4,%xmm2 3694 paddd %xmm0,%xmm1 3695 pcmpeqd %xmm5,%xmm0 3696 movdqa %xmm3,48(%rax) 3697 movdqa %xmm4,%xmm3 3698 3699 paddd %xmm1,%xmm2 3700 pcmpeqd %xmm5,%xmm1 3701 movdqa %xmm0,64(%rax) 3702 movdqa %xmm4,%xmm0 3703 3704 paddd %xmm2,%xmm3 3705 pcmpeqd %xmm5,%xmm2 3706 movdqa %xmm1,80(%rax) 3707 movdqa %xmm4,%xmm1 3708 3709 paddd %xmm3,%xmm0 3710 pcmpeqd %xmm5,%xmm3 3711 movdqa %xmm2,96(%rax) 3712 movdqa %xmm4,%xmm2 3713 movdqa %xmm3,112(%rax) 3714 jmp L$gather 3715 3716.p2align 5 3717L$gather: 3718 pxor %xmm4,%xmm4 3719 pxor %xmm5,%xmm5 3720 movdqa -128(%r11),%xmm0 3721 movdqa -112(%r11),%xmm1 3722 movdqa -96(%r11),%xmm2 3723 pand -128(%rax),%xmm0 3724 movdqa -80(%r11),%xmm3 3725 pand -112(%rax),%xmm1 3726 por %xmm0,%xmm4 3727 pand -96(%rax),%xmm2 3728 por %xmm1,%xmm5 3729 pand -80(%rax),%xmm3 3730 por %xmm2,%xmm4 3731 por %xmm3,%xmm5 3732 movdqa -64(%r11),%xmm0 3733 movdqa -48(%r11),%xmm1 3734 movdqa -32(%r11),%xmm2 3735 pand -64(%rax),%xmm0 3736 movdqa -16(%r11),%xmm3 3737 pand -48(%rax),%xmm1 3738 por %xmm0,%xmm4 3739 pand -32(%rax),%xmm2 3740 por %xmm1,%xmm5 3741 pand -16(%rax),%xmm3 3742 por %xmm2,%xmm4 3743 por %xmm3,%xmm5 3744 movdqa 0(%r11),%xmm0 3745 movdqa 16(%r11),%xmm1 3746 movdqa 32(%r11),%xmm2 3747 pand 0(%rax),%xmm0 3748 movdqa 48(%r11),%xmm3 3749 pand 16(%rax),%xmm1 3750 por %xmm0,%xmm4 3751 pand 32(%rax),%xmm2 3752 por %xmm1,%xmm5 3753 pand 48(%rax),%xmm3 3754 por %xmm2,%xmm4 3755 por %xmm3,%xmm5 3756 movdqa 64(%r11),%xmm0 3757 movdqa 80(%r11),%xmm1 3758 movdqa 96(%r11),%xmm2 3759 pand 64(%rax),%xmm0 3760 movdqa 112(%r11),%xmm3 3761 pand 80(%rax),%xmm1 3762 por %xmm0,%xmm4 3763 pand 96(%rax),%xmm2 3764 por %xmm1,%xmm5 3765 pand 112(%rax),%xmm3 3766 por %xmm2,%xmm4 3767 por %xmm3,%xmm5 3768 por %xmm5,%xmm4 3769 leaq 256(%r11),%r11 3770 pshufd $0x4e,%xmm4,%xmm0 3771 por %xmm4,%xmm0 3772 movq %xmm0,(%rdi) 3773 leaq 8(%rdi),%rdi 3774 subl $1,%esi 3775 jnz L$gather 3776 3777 leaq (%r10),%rsp 3778 3779 .byte 0xf3,0xc3 3780L$SEH_end_bn_gather5: 3781 3782 3783.p2align 6 3784L$inc: 3785.long 0,0, 1,1 3786.long 2,2, 2,2 3787.byte 77,111,110,116,103,111,109,101,114,121,32,77,117,108,116,105,112,108,105,99,97,116,105,111,110,32,119,105,116,104,32,115,99,97,116,116,101,114,47,103,97,116,104,101,114,32,102,111,114,32,120,56,54,95,54,52,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3788#endif 3789