1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__has_feature) 5#if __has_feature(memory_sanitizer) && !defined(OPENSSL_NO_ASM) 6#define OPENSSL_NO_ASM 7#endif 8#endif 9 10#if defined(__x86_64__) && !defined(OPENSSL_NO_ASM) 11.text 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28.type _vpaes_encrypt_core,@function 29.align 16 30_vpaes_encrypt_core: 31.cfi_startproc 32 movq %rdx,%r9 33 movq $16,%r11 34 movl 240(%rdx),%eax 35 movdqa %xmm9,%xmm1 36 movdqa .Lk_ipt(%rip),%xmm2 37 pandn %xmm0,%xmm1 38 movdqu (%r9),%xmm5 39 psrld $4,%xmm1 40 pand %xmm9,%xmm0 41.byte 102,15,56,0,208 42 movdqa .Lk_ipt+16(%rip),%xmm0 43.byte 102,15,56,0,193 44 pxor %xmm5,%xmm2 45 addq $16,%r9 46 pxor %xmm2,%xmm0 47 leaq .Lk_mc_backward(%rip),%r10 48 jmp .Lenc_entry 49 50.align 16 51.Lenc_loop: 52 53 movdqa %xmm13,%xmm4 54 movdqa %xmm12,%xmm0 55.byte 102,15,56,0,226 56.byte 102,15,56,0,195 57 pxor %xmm5,%xmm4 58 movdqa %xmm15,%xmm5 59 pxor %xmm4,%xmm0 60 movdqa -64(%r11,%r10,1),%xmm1 61.byte 102,15,56,0,234 62 movdqa (%r11,%r10,1),%xmm4 63 movdqa %xmm14,%xmm2 64.byte 102,15,56,0,211 65 movdqa %xmm0,%xmm3 66 pxor %xmm5,%xmm2 67.byte 102,15,56,0,193 68 addq $16,%r9 69 pxor %xmm2,%xmm0 70.byte 102,15,56,0,220 71 addq $16,%r11 72 pxor %xmm0,%xmm3 73.byte 102,15,56,0,193 74 andq $0x30,%r11 75 subq $1,%rax 76 pxor %xmm3,%xmm0 77 78.Lenc_entry: 79 80 movdqa %xmm9,%xmm1 81 movdqa %xmm11,%xmm5 82 pandn %xmm0,%xmm1 83 psrld $4,%xmm1 84 pand %xmm9,%xmm0 85.byte 102,15,56,0,232 86 movdqa %xmm10,%xmm3 87 pxor %xmm1,%xmm0 88.byte 102,15,56,0,217 89 movdqa %xmm10,%xmm4 90 pxor %xmm5,%xmm3 91.byte 102,15,56,0,224 92 movdqa %xmm10,%xmm2 93 pxor %xmm5,%xmm4 94.byte 102,15,56,0,211 95 movdqa %xmm10,%xmm3 96 pxor %xmm0,%xmm2 97.byte 102,15,56,0,220 98 movdqu (%r9),%xmm5 99 pxor %xmm1,%xmm3 100 jnz .Lenc_loop 101 102 103 movdqa -96(%r10),%xmm4 104 movdqa -80(%r10),%xmm0 105.byte 102,15,56,0,226 106 pxor %xmm5,%xmm4 107.byte 102,15,56,0,195 108 movdqa 64(%r11,%r10,1),%xmm1 109 pxor %xmm4,%xmm0 110.byte 102,15,56,0,193 111 .byte 0xf3,0xc3 112.cfi_endproc 113.size _vpaes_encrypt_core,.-_vpaes_encrypt_core 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144.type _vpaes_encrypt_core_2x,@function 145.align 16 146_vpaes_encrypt_core_2x: 147.cfi_startproc 148 movq %rdx,%r9 149 movq $16,%r11 150 movl 240(%rdx),%eax 151 movdqa %xmm9,%xmm1 152 movdqa %xmm9,%xmm7 153 movdqa .Lk_ipt(%rip),%xmm2 154 movdqa %xmm2,%xmm8 155 pandn %xmm0,%xmm1 156 pandn %xmm6,%xmm7 157 movdqu (%r9),%xmm5 158 159 psrld $4,%xmm1 160 psrld $4,%xmm7 161 pand %xmm9,%xmm0 162 pand %xmm9,%xmm6 163.byte 102,15,56,0,208 164.byte 102,68,15,56,0,198 165 movdqa .Lk_ipt+16(%rip),%xmm0 166 movdqa %xmm0,%xmm6 167.byte 102,15,56,0,193 168.byte 102,15,56,0,247 169 pxor %xmm5,%xmm2 170 pxor %xmm5,%xmm8 171 addq $16,%r9 172 pxor %xmm2,%xmm0 173 pxor %xmm8,%xmm6 174 leaq .Lk_mc_backward(%rip),%r10 175 jmp .Lenc2x_entry 176 177.align 16 178.Lenc2x_loop: 179 180 movdqa .Lk_sb1(%rip),%xmm4 181 movdqa .Lk_sb1+16(%rip),%xmm0 182 movdqa %xmm4,%xmm12 183 movdqa %xmm0,%xmm6 184.byte 102,15,56,0,226 185.byte 102,69,15,56,0,224 186.byte 102,15,56,0,195 187.byte 102,65,15,56,0,243 188 pxor %xmm5,%xmm4 189 pxor %xmm5,%xmm12 190 movdqa .Lk_sb2(%rip),%xmm5 191 movdqa %xmm5,%xmm13 192 pxor %xmm4,%xmm0 193 pxor %xmm12,%xmm6 194 movdqa -64(%r11,%r10,1),%xmm1 195 196.byte 102,15,56,0,234 197.byte 102,69,15,56,0,232 198 movdqa (%r11,%r10,1),%xmm4 199 200 movdqa .Lk_sb2+16(%rip),%xmm2 201 movdqa %xmm2,%xmm8 202.byte 102,15,56,0,211 203.byte 102,69,15,56,0,195 204 movdqa %xmm0,%xmm3 205 movdqa %xmm6,%xmm11 206 pxor %xmm5,%xmm2 207 pxor %xmm13,%xmm8 208.byte 102,15,56,0,193 209.byte 102,15,56,0,241 210 addq $16,%r9 211 pxor %xmm2,%xmm0 212 pxor %xmm8,%xmm6 213.byte 102,15,56,0,220 214.byte 102,68,15,56,0,220 215 addq $16,%r11 216 pxor %xmm0,%xmm3 217 pxor %xmm6,%xmm11 218.byte 102,15,56,0,193 219.byte 102,15,56,0,241 220 andq $0x30,%r11 221 subq $1,%rax 222 pxor %xmm3,%xmm0 223 pxor %xmm11,%xmm6 224 225.Lenc2x_entry: 226 227 movdqa %xmm9,%xmm1 228 movdqa %xmm9,%xmm7 229 movdqa .Lk_inv+16(%rip),%xmm5 230 movdqa %xmm5,%xmm13 231 pandn %xmm0,%xmm1 232 pandn %xmm6,%xmm7 233 psrld $4,%xmm1 234 psrld $4,%xmm7 235 pand %xmm9,%xmm0 236 pand %xmm9,%xmm6 237.byte 102,15,56,0,232 238.byte 102,68,15,56,0,238 239 movdqa %xmm10,%xmm3 240 movdqa %xmm10,%xmm11 241 pxor %xmm1,%xmm0 242 pxor %xmm7,%xmm6 243.byte 102,15,56,0,217 244.byte 102,68,15,56,0,223 245 movdqa %xmm10,%xmm4 246 movdqa %xmm10,%xmm12 247 pxor %xmm5,%xmm3 248 pxor %xmm13,%xmm11 249.byte 102,15,56,0,224 250.byte 102,68,15,56,0,230 251 movdqa %xmm10,%xmm2 252 movdqa %xmm10,%xmm8 253 pxor %xmm5,%xmm4 254 pxor %xmm13,%xmm12 255.byte 102,15,56,0,211 256.byte 102,69,15,56,0,195 257 movdqa %xmm10,%xmm3 258 movdqa %xmm10,%xmm11 259 pxor %xmm0,%xmm2 260 pxor %xmm6,%xmm8 261.byte 102,15,56,0,220 262.byte 102,69,15,56,0,220 263 movdqu (%r9),%xmm5 264 265 pxor %xmm1,%xmm3 266 pxor %xmm7,%xmm11 267 jnz .Lenc2x_loop 268 269 270 movdqa -96(%r10),%xmm4 271 movdqa -80(%r10),%xmm0 272 movdqa %xmm4,%xmm12 273 movdqa %xmm0,%xmm6 274.byte 102,15,56,0,226 275.byte 102,69,15,56,0,224 276 pxor %xmm5,%xmm4 277 pxor %xmm5,%xmm12 278.byte 102,15,56,0,195 279.byte 102,65,15,56,0,243 280 movdqa 64(%r11,%r10,1),%xmm1 281 282 pxor %xmm4,%xmm0 283 pxor %xmm12,%xmm6 284.byte 102,15,56,0,193 285.byte 102,15,56,0,241 286 .byte 0xf3,0xc3 287.cfi_endproc 288.size _vpaes_encrypt_core_2x,.-_vpaes_encrypt_core_2x 289 290 291 292 293 294 295.type _vpaes_schedule_core,@function 296.align 16 297_vpaes_schedule_core: 298.cfi_startproc 299 300 301 302 303 304 call _vpaes_preheat 305 movdqa .Lk_rcon(%rip),%xmm8 306 movdqu (%rdi),%xmm0 307 308 309 movdqa %xmm0,%xmm3 310 leaq .Lk_ipt(%rip),%r11 311 call _vpaes_schedule_transform 312 movdqa %xmm0,%xmm7 313 314 leaq .Lk_sr(%rip),%r10 315 316 317 movdqu %xmm0,(%rdx) 318 319.Lschedule_go: 320 cmpl $192,%esi 321 ja .Lschedule_256 322 323 324 325 326 327 328 329 330 331 332 333.Lschedule_128: 334 movl $10,%esi 335 336.Loop_schedule_128: 337 call _vpaes_schedule_round 338 decq %rsi 339 jz .Lschedule_mangle_last 340 call _vpaes_schedule_mangle 341 jmp .Loop_schedule_128 342 343 344 345 346 347 348 349 350 351 352 353.align 16 354.Lschedule_256: 355 movdqu 16(%rdi),%xmm0 356 call _vpaes_schedule_transform 357 movl $7,%esi 358 359.Loop_schedule_256: 360 call _vpaes_schedule_mangle 361 movdqa %xmm0,%xmm6 362 363 364 call _vpaes_schedule_round 365 decq %rsi 366 jz .Lschedule_mangle_last 367 call _vpaes_schedule_mangle 368 369 370 pshufd $0xFF,%xmm0,%xmm0 371 movdqa %xmm7,%xmm5 372 movdqa %xmm6,%xmm7 373 call _vpaes_schedule_low_round 374 movdqa %xmm5,%xmm7 375 376 jmp .Loop_schedule_256 377 378 379 380 381 382 383 384 385 386 387 388 389.align 16 390.Lschedule_mangle_last: 391 392 leaq .Lk_deskew(%rip),%r11 393 394 395 movdqa (%r8,%r10,1),%xmm1 396.byte 102,15,56,0,193 397 leaq .Lk_opt(%rip),%r11 398 addq $32,%rdx 399 400.Lschedule_mangle_last_dec: 401 addq $-16,%rdx 402 pxor .Lk_s63(%rip),%xmm0 403 call _vpaes_schedule_transform 404 movdqu %xmm0,(%rdx) 405 406 407 pxor %xmm0,%xmm0 408 pxor %xmm1,%xmm1 409 pxor %xmm2,%xmm2 410 pxor %xmm3,%xmm3 411 pxor %xmm4,%xmm4 412 pxor %xmm5,%xmm5 413 pxor %xmm6,%xmm6 414 pxor %xmm7,%xmm7 415 .byte 0xf3,0xc3 416.cfi_endproc 417.size _vpaes_schedule_core,.-_vpaes_schedule_core 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437.type _vpaes_schedule_round,@function 438.align 16 439_vpaes_schedule_round: 440.cfi_startproc 441 442 pxor %xmm1,%xmm1 443.byte 102,65,15,58,15,200,15 444.byte 102,69,15,58,15,192,15 445 pxor %xmm1,%xmm7 446 447 448 pshufd $0xFF,%xmm0,%xmm0 449.byte 102,15,58,15,192,1 450 451 452 453 454_vpaes_schedule_low_round: 455 456 movdqa %xmm7,%xmm1 457 pslldq $4,%xmm7 458 pxor %xmm1,%xmm7 459 movdqa %xmm7,%xmm1 460 pslldq $8,%xmm7 461 pxor %xmm1,%xmm7 462 pxor .Lk_s63(%rip),%xmm7 463 464 465 movdqa %xmm9,%xmm1 466 pandn %xmm0,%xmm1 467 psrld $4,%xmm1 468 pand %xmm9,%xmm0 469 movdqa %xmm11,%xmm2 470.byte 102,15,56,0,208 471 pxor %xmm1,%xmm0 472 movdqa %xmm10,%xmm3 473.byte 102,15,56,0,217 474 pxor %xmm2,%xmm3 475 movdqa %xmm10,%xmm4 476.byte 102,15,56,0,224 477 pxor %xmm2,%xmm4 478 movdqa %xmm10,%xmm2 479.byte 102,15,56,0,211 480 pxor %xmm0,%xmm2 481 movdqa %xmm10,%xmm3 482.byte 102,15,56,0,220 483 pxor %xmm1,%xmm3 484 movdqa %xmm13,%xmm4 485.byte 102,15,56,0,226 486 movdqa %xmm12,%xmm0 487.byte 102,15,56,0,195 488 pxor %xmm4,%xmm0 489 490 491 pxor %xmm7,%xmm0 492 movdqa %xmm0,%xmm7 493 .byte 0xf3,0xc3 494.cfi_endproc 495.size _vpaes_schedule_round,.-_vpaes_schedule_round 496 497 498 499 500 501 502 503 504 505 506.type _vpaes_schedule_transform,@function 507.align 16 508_vpaes_schedule_transform: 509.cfi_startproc 510 movdqa %xmm9,%xmm1 511 pandn %xmm0,%xmm1 512 psrld $4,%xmm1 513 pand %xmm9,%xmm0 514 movdqa (%r11),%xmm2 515.byte 102,15,56,0,208 516 movdqa 16(%r11),%xmm0 517.byte 102,15,56,0,193 518 pxor %xmm2,%xmm0 519 .byte 0xf3,0xc3 520.cfi_endproc 521.size _vpaes_schedule_transform,.-_vpaes_schedule_transform 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546.type _vpaes_schedule_mangle,@function 547.align 16 548_vpaes_schedule_mangle: 549.cfi_startproc 550 movdqa %xmm0,%xmm4 551 movdqa .Lk_mc_forward(%rip),%xmm5 552 553 554 addq $16,%rdx 555 pxor .Lk_s63(%rip),%xmm4 556.byte 102,15,56,0,229 557 movdqa %xmm4,%xmm3 558.byte 102,15,56,0,229 559 pxor %xmm4,%xmm3 560.byte 102,15,56,0,229 561 pxor %xmm4,%xmm3 562 563.Lschedule_mangle_both: 564 movdqa (%r8,%r10,1),%xmm1 565.byte 102,15,56,0,217 566 addq $-16,%r8 567 andq $0x30,%r8 568 movdqu %xmm3,(%rdx) 569 .byte 0xf3,0xc3 570.cfi_endproc 571.size _vpaes_schedule_mangle,.-_vpaes_schedule_mangle 572 573 574 575 576.globl GFp_vpaes_set_encrypt_key 577.hidden GFp_vpaes_set_encrypt_key 578.type GFp_vpaes_set_encrypt_key,@function 579.align 16 580GFp_vpaes_set_encrypt_key: 581.cfi_startproc 582#ifdef BORINGSSL_DISPATCH_TEST 583.extern BORINGSSL_function_hit 584.hidden BORINGSSL_function_hit 585 movb $1,BORINGSSL_function_hit+5(%rip) 586#endif 587 588 movl %esi,%eax 589 shrl $5,%eax 590 addl $5,%eax 591 movl %eax,240(%rdx) 592 593 movl $0,%ecx 594 movl $0x30,%r8d 595 call _vpaes_schedule_core 596 xorl %eax,%eax 597 .byte 0xf3,0xc3 598.cfi_endproc 599.size GFp_vpaes_set_encrypt_key,.-GFp_vpaes_set_encrypt_key 600 601.globl GFp_vpaes_encrypt 602.hidden GFp_vpaes_encrypt 603.type GFp_vpaes_encrypt,@function 604.align 16 605GFp_vpaes_encrypt: 606.cfi_startproc 607 movdqu (%rdi),%xmm0 608 call _vpaes_preheat 609 call _vpaes_encrypt_core 610 movdqu %xmm0,(%rsi) 611 .byte 0xf3,0xc3 612.cfi_endproc 613.size GFp_vpaes_encrypt,.-GFp_vpaes_encrypt 614.globl GFp_vpaes_ctr32_encrypt_blocks 615.hidden GFp_vpaes_ctr32_encrypt_blocks 616.type GFp_vpaes_ctr32_encrypt_blocks,@function 617.align 16 618GFp_vpaes_ctr32_encrypt_blocks: 619.cfi_startproc 620 621 xchgq %rcx,%rdx 622 testq %rcx,%rcx 623 jz .Lctr32_abort 624 movdqu (%r8),%xmm0 625 movdqa .Lctr_add_one(%rip),%xmm8 626 subq %rdi,%rsi 627 call _vpaes_preheat 628 movdqa %xmm0,%xmm6 629 pshufb .Lrev_ctr(%rip),%xmm6 630 631 testq $1,%rcx 632 jz .Lctr32_prep_loop 633 634 635 636 movdqu (%rdi),%xmm7 637 call _vpaes_encrypt_core 638 pxor %xmm7,%xmm0 639 paddd %xmm8,%xmm6 640 movdqu %xmm0,(%rsi,%rdi,1) 641 subq $1,%rcx 642 leaq 16(%rdi),%rdi 643 jz .Lctr32_done 644 645.Lctr32_prep_loop: 646 647 648 movdqa %xmm6,%xmm14 649 movdqa %xmm6,%xmm15 650 paddd %xmm8,%xmm15 651 652.Lctr32_loop: 653 movdqa .Lrev_ctr(%rip),%xmm1 654 movdqa %xmm14,%xmm0 655 movdqa %xmm15,%xmm6 656.byte 102,15,56,0,193 657.byte 102,15,56,0,241 658 call _vpaes_encrypt_core_2x 659 movdqu (%rdi),%xmm1 660 movdqu 16(%rdi),%xmm2 661 movdqa .Lctr_add_two(%rip),%xmm3 662 pxor %xmm1,%xmm0 663 pxor %xmm2,%xmm6 664 paddd %xmm3,%xmm14 665 paddd %xmm3,%xmm15 666 movdqu %xmm0,(%rsi,%rdi,1) 667 movdqu %xmm6,16(%rsi,%rdi,1) 668 subq $2,%rcx 669 leaq 32(%rdi),%rdi 670 jnz .Lctr32_loop 671 672.Lctr32_done: 673.Lctr32_abort: 674 .byte 0xf3,0xc3 675.cfi_endproc 676.size GFp_vpaes_ctr32_encrypt_blocks,.-GFp_vpaes_ctr32_encrypt_blocks 677 678 679 680 681 682 683.type _vpaes_preheat,@function 684.align 16 685_vpaes_preheat: 686.cfi_startproc 687 leaq .Lk_s0F(%rip),%r10 688 movdqa -32(%r10),%xmm10 689 movdqa -16(%r10),%xmm11 690 movdqa 0(%r10),%xmm9 691 movdqa 48(%r10),%xmm13 692 movdqa 64(%r10),%xmm12 693 movdqa 80(%r10),%xmm15 694 movdqa 96(%r10),%xmm14 695 .byte 0xf3,0xc3 696.cfi_endproc 697.size _vpaes_preheat,.-_vpaes_preheat 698 699 700 701 702 703.type _vpaes_consts,@object 704.align 64 705_vpaes_consts: 706.Lk_inv: 707.quad 0x0E05060F0D080180, 0x040703090A0B0C02 708.quad 0x01040A060F0B0780, 0x030D0E0C02050809 709 710.Lk_s0F: 711.quad 0x0F0F0F0F0F0F0F0F, 0x0F0F0F0F0F0F0F0F 712 713.Lk_ipt: 714.quad 0xC2B2E8985A2A7000, 0xCABAE09052227808 715.quad 0x4C01307D317C4D00, 0xCD80B1FCB0FDCC81 716 717.Lk_sb1: 718.quad 0xB19BE18FCB503E00, 0xA5DF7A6E142AF544 719.quad 0x3618D415FAE22300, 0x3BF7CCC10D2ED9EF 720.Lk_sb2: 721.quad 0xE27A93C60B712400, 0x5EB7E955BC982FCD 722.quad 0x69EB88400AE12900, 0xC2A163C8AB82234A 723.Lk_sbo: 724.quad 0xD0D26D176FBDC700, 0x15AABF7AC502A878 725.quad 0xCFE474A55FBB6A00, 0x8E1E90D1412B35FA 726 727.Lk_mc_forward: 728.quad 0x0407060500030201, 0x0C0F0E0D080B0A09 729.quad 0x080B0A0904070605, 0x000302010C0F0E0D 730.quad 0x0C0F0E0D080B0A09, 0x0407060500030201 731.quad 0x000302010C0F0E0D, 0x080B0A0904070605 732 733.Lk_mc_backward: 734.quad 0x0605040702010003, 0x0E0D0C0F0A09080B 735.quad 0x020100030E0D0C0F, 0x0A09080B06050407 736.quad 0x0E0D0C0F0A09080B, 0x0605040702010003 737.quad 0x0A09080B06050407, 0x020100030E0D0C0F 738 739.Lk_sr: 740.quad 0x0706050403020100, 0x0F0E0D0C0B0A0908 741.quad 0x030E09040F0A0500, 0x0B06010C07020D08 742.quad 0x0F060D040B020900, 0x070E050C030A0108 743.quad 0x0B0E0104070A0D00, 0x0306090C0F020508 744 745.Lk_rcon: 746.quad 0x1F8391B9AF9DEEB6, 0x702A98084D7C7D81 747 748.Lk_s63: 749.quad 0x5B5B5B5B5B5B5B5B, 0x5B5B5B5B5B5B5B5B 750 751.Lk_opt: 752.quad 0xFF9F4929D6B66000, 0xF7974121DEBE6808 753.quad 0x01EDBD5150BCEC00, 0xE10D5DB1B05C0CE0 754 755.Lk_deskew: 756.quad 0x07E4A34047A4E300, 0x1DFEB95A5DBEF91A 757.quad 0x5F36B5DC83EA6900, 0x2841C2ABF49D1E77 758 759 760.Lrev_ctr: 761.quad 0x0706050403020100, 0x0c0d0e0f0b0a0908 762 763 764.Lctr_add_one: 765.quad 0x0000000000000000, 0x0000000100000000 766.Lctr_add_two: 767.quad 0x0000000000000000, 0x0000000200000000 768 769.byte 86,101,99,116,111,114,32,80,101,114,109,117,116,97,116,105,111,110,32,65,69,83,32,102,111,114,32,120,56,54,95,54,52,47,83,83,83,69,51,44,32,77,105,107,101,32,72,97,109,98,117,114,103,32,40,83,116,97,110,102,111,114,100,32,85,110,105,118,101,114,115,105,116,121,41,0 770.align 64 771.size _vpaes_consts,.-_vpaes_consts 772#endif 773.section .note.GNU-stack,"",@progbits 774