1#if defined(__x86_64__) 2.text 3 4.globl _aesni_encrypt 5.private_extern _aesni_encrypt 6 7.p2align 4 8_aesni_encrypt: 9 movups (%rdi),%xmm2 10 movl 240(%rdx),%eax 11 movups (%rdx),%xmm0 12 movups 16(%rdx),%xmm1 13 leaq 32(%rdx),%rdx 14 xorps %xmm0,%xmm2 15L$oop_enc1_1: 16.byte 102,15,56,220,209 17 decl %eax 18 movups (%rdx),%xmm1 19 leaq 16(%rdx),%rdx 20 jnz L$oop_enc1_1 21.byte 102,15,56,221,209 22 pxor %xmm0,%xmm0 23 pxor %xmm1,%xmm1 24 movups %xmm2,(%rsi) 25 pxor %xmm2,%xmm2 26 .byte 0xf3,0xc3 27 28 29.globl _aesni_decrypt 30.private_extern _aesni_decrypt 31 32.p2align 4 33_aesni_decrypt: 34 movups (%rdi),%xmm2 35 movl 240(%rdx),%eax 36 movups (%rdx),%xmm0 37 movups 16(%rdx),%xmm1 38 leaq 32(%rdx),%rdx 39 xorps %xmm0,%xmm2 40L$oop_dec1_2: 41.byte 102,15,56,222,209 42 decl %eax 43 movups (%rdx),%xmm1 44 leaq 16(%rdx),%rdx 45 jnz L$oop_dec1_2 46.byte 102,15,56,223,209 47 pxor %xmm0,%xmm0 48 pxor %xmm1,%xmm1 49 movups %xmm2,(%rsi) 50 pxor %xmm2,%xmm2 51 .byte 0xf3,0xc3 52 53 54.p2align 4 55_aesni_encrypt2: 56 movups (%rcx),%xmm0 57 shll $4,%eax 58 movups 16(%rcx),%xmm1 59 xorps %xmm0,%xmm2 60 xorps %xmm0,%xmm3 61 movups 32(%rcx),%xmm0 62 leaq 32(%rcx,%rax,1),%rcx 63 negq %rax 64 addq $16,%rax 65 66L$enc_loop2: 67.byte 102,15,56,220,209 68.byte 102,15,56,220,217 69 movups (%rcx,%rax,1),%xmm1 70 addq $32,%rax 71.byte 102,15,56,220,208 72.byte 102,15,56,220,216 73 movups -16(%rcx,%rax,1),%xmm0 74 jnz L$enc_loop2 75 76.byte 102,15,56,220,209 77.byte 102,15,56,220,217 78.byte 102,15,56,221,208 79.byte 102,15,56,221,216 80 .byte 0xf3,0xc3 81 82 83.p2align 4 84_aesni_decrypt2: 85 movups (%rcx),%xmm0 86 shll $4,%eax 87 movups 16(%rcx),%xmm1 88 xorps %xmm0,%xmm2 89 xorps %xmm0,%xmm3 90 movups 32(%rcx),%xmm0 91 leaq 32(%rcx,%rax,1),%rcx 92 negq %rax 93 addq $16,%rax 94 95L$dec_loop2: 96.byte 102,15,56,222,209 97.byte 102,15,56,222,217 98 movups (%rcx,%rax,1),%xmm1 99 addq $32,%rax 100.byte 102,15,56,222,208 101.byte 102,15,56,222,216 102 movups -16(%rcx,%rax,1),%xmm0 103 jnz L$dec_loop2 104 105.byte 102,15,56,222,209 106.byte 102,15,56,222,217 107.byte 102,15,56,223,208 108.byte 102,15,56,223,216 109 .byte 0xf3,0xc3 110 111 112.p2align 4 113_aesni_encrypt3: 114 movups (%rcx),%xmm0 115 shll $4,%eax 116 movups 16(%rcx),%xmm1 117 xorps %xmm0,%xmm2 118 xorps %xmm0,%xmm3 119 xorps %xmm0,%xmm4 120 movups 32(%rcx),%xmm0 121 leaq 32(%rcx,%rax,1),%rcx 122 negq %rax 123 addq $16,%rax 124 125L$enc_loop3: 126.byte 102,15,56,220,209 127.byte 102,15,56,220,217 128.byte 102,15,56,220,225 129 movups (%rcx,%rax,1),%xmm1 130 addq $32,%rax 131.byte 102,15,56,220,208 132.byte 102,15,56,220,216 133.byte 102,15,56,220,224 134 movups -16(%rcx,%rax,1),%xmm0 135 jnz L$enc_loop3 136 137.byte 102,15,56,220,209 138.byte 102,15,56,220,217 139.byte 102,15,56,220,225 140.byte 102,15,56,221,208 141.byte 102,15,56,221,216 142.byte 102,15,56,221,224 143 .byte 0xf3,0xc3 144 145 146.p2align 4 147_aesni_decrypt3: 148 movups (%rcx),%xmm0 149 shll $4,%eax 150 movups 16(%rcx),%xmm1 151 xorps %xmm0,%xmm2 152 xorps %xmm0,%xmm3 153 xorps %xmm0,%xmm4 154 movups 32(%rcx),%xmm0 155 leaq 32(%rcx,%rax,1),%rcx 156 negq %rax 157 addq $16,%rax 158 159L$dec_loop3: 160.byte 102,15,56,222,209 161.byte 102,15,56,222,217 162.byte 102,15,56,222,225 163 movups (%rcx,%rax,1),%xmm1 164 addq $32,%rax 165.byte 102,15,56,222,208 166.byte 102,15,56,222,216 167.byte 102,15,56,222,224 168 movups -16(%rcx,%rax,1),%xmm0 169 jnz L$dec_loop3 170 171.byte 102,15,56,222,209 172.byte 102,15,56,222,217 173.byte 102,15,56,222,225 174.byte 102,15,56,223,208 175.byte 102,15,56,223,216 176.byte 102,15,56,223,224 177 .byte 0xf3,0xc3 178 179 180.p2align 4 181_aesni_encrypt4: 182 movups (%rcx),%xmm0 183 shll $4,%eax 184 movups 16(%rcx),%xmm1 185 xorps %xmm0,%xmm2 186 xorps %xmm0,%xmm3 187 xorps %xmm0,%xmm4 188 xorps %xmm0,%xmm5 189 movups 32(%rcx),%xmm0 190 leaq 32(%rcx,%rax,1),%rcx 191 negq %rax 192.byte 0x0f,0x1f,0x00 193 addq $16,%rax 194 195L$enc_loop4: 196.byte 102,15,56,220,209 197.byte 102,15,56,220,217 198.byte 102,15,56,220,225 199.byte 102,15,56,220,233 200 movups (%rcx,%rax,1),%xmm1 201 addq $32,%rax 202.byte 102,15,56,220,208 203.byte 102,15,56,220,216 204.byte 102,15,56,220,224 205.byte 102,15,56,220,232 206 movups -16(%rcx,%rax,1),%xmm0 207 jnz L$enc_loop4 208 209.byte 102,15,56,220,209 210.byte 102,15,56,220,217 211.byte 102,15,56,220,225 212.byte 102,15,56,220,233 213.byte 102,15,56,221,208 214.byte 102,15,56,221,216 215.byte 102,15,56,221,224 216.byte 102,15,56,221,232 217 .byte 0xf3,0xc3 218 219 220.p2align 4 221_aesni_decrypt4: 222 movups (%rcx),%xmm0 223 shll $4,%eax 224 movups 16(%rcx),%xmm1 225 xorps %xmm0,%xmm2 226 xorps %xmm0,%xmm3 227 xorps %xmm0,%xmm4 228 xorps %xmm0,%xmm5 229 movups 32(%rcx),%xmm0 230 leaq 32(%rcx,%rax,1),%rcx 231 negq %rax 232.byte 0x0f,0x1f,0x00 233 addq $16,%rax 234 235L$dec_loop4: 236.byte 102,15,56,222,209 237.byte 102,15,56,222,217 238.byte 102,15,56,222,225 239.byte 102,15,56,222,233 240 movups (%rcx,%rax,1),%xmm1 241 addq $32,%rax 242.byte 102,15,56,222,208 243.byte 102,15,56,222,216 244.byte 102,15,56,222,224 245.byte 102,15,56,222,232 246 movups -16(%rcx,%rax,1),%xmm0 247 jnz L$dec_loop4 248 249.byte 102,15,56,222,209 250.byte 102,15,56,222,217 251.byte 102,15,56,222,225 252.byte 102,15,56,222,233 253.byte 102,15,56,223,208 254.byte 102,15,56,223,216 255.byte 102,15,56,223,224 256.byte 102,15,56,223,232 257 .byte 0xf3,0xc3 258 259 260.p2align 4 261_aesni_encrypt6: 262 movups (%rcx),%xmm0 263 shll $4,%eax 264 movups 16(%rcx),%xmm1 265 xorps %xmm0,%xmm2 266 pxor %xmm0,%xmm3 267 pxor %xmm0,%xmm4 268.byte 102,15,56,220,209 269 leaq 32(%rcx,%rax,1),%rcx 270 negq %rax 271.byte 102,15,56,220,217 272 pxor %xmm0,%xmm5 273 pxor %xmm0,%xmm6 274.byte 102,15,56,220,225 275 pxor %xmm0,%xmm7 276 movups (%rcx,%rax,1),%xmm0 277 addq $16,%rax 278 jmp L$enc_loop6_enter 279.p2align 4 280L$enc_loop6: 281.byte 102,15,56,220,209 282.byte 102,15,56,220,217 283.byte 102,15,56,220,225 284L$enc_loop6_enter: 285.byte 102,15,56,220,233 286.byte 102,15,56,220,241 287.byte 102,15,56,220,249 288 movups (%rcx,%rax,1),%xmm1 289 addq $32,%rax 290.byte 102,15,56,220,208 291.byte 102,15,56,220,216 292.byte 102,15,56,220,224 293.byte 102,15,56,220,232 294.byte 102,15,56,220,240 295.byte 102,15,56,220,248 296 movups -16(%rcx,%rax,1),%xmm0 297 jnz L$enc_loop6 298 299.byte 102,15,56,220,209 300.byte 102,15,56,220,217 301.byte 102,15,56,220,225 302.byte 102,15,56,220,233 303.byte 102,15,56,220,241 304.byte 102,15,56,220,249 305.byte 102,15,56,221,208 306.byte 102,15,56,221,216 307.byte 102,15,56,221,224 308.byte 102,15,56,221,232 309.byte 102,15,56,221,240 310.byte 102,15,56,221,248 311 .byte 0xf3,0xc3 312 313 314.p2align 4 315_aesni_decrypt6: 316 movups (%rcx),%xmm0 317 shll $4,%eax 318 movups 16(%rcx),%xmm1 319 xorps %xmm0,%xmm2 320 pxor %xmm0,%xmm3 321 pxor %xmm0,%xmm4 322.byte 102,15,56,222,209 323 leaq 32(%rcx,%rax,1),%rcx 324 negq %rax 325.byte 102,15,56,222,217 326 pxor %xmm0,%xmm5 327 pxor %xmm0,%xmm6 328.byte 102,15,56,222,225 329 pxor %xmm0,%xmm7 330 movups (%rcx,%rax,1),%xmm0 331 addq $16,%rax 332 jmp L$dec_loop6_enter 333.p2align 4 334L$dec_loop6: 335.byte 102,15,56,222,209 336.byte 102,15,56,222,217 337.byte 102,15,56,222,225 338L$dec_loop6_enter: 339.byte 102,15,56,222,233 340.byte 102,15,56,222,241 341.byte 102,15,56,222,249 342 movups (%rcx,%rax,1),%xmm1 343 addq $32,%rax 344.byte 102,15,56,222,208 345.byte 102,15,56,222,216 346.byte 102,15,56,222,224 347.byte 102,15,56,222,232 348.byte 102,15,56,222,240 349.byte 102,15,56,222,248 350 movups -16(%rcx,%rax,1),%xmm0 351 jnz L$dec_loop6 352 353.byte 102,15,56,222,209 354.byte 102,15,56,222,217 355.byte 102,15,56,222,225 356.byte 102,15,56,222,233 357.byte 102,15,56,222,241 358.byte 102,15,56,222,249 359.byte 102,15,56,223,208 360.byte 102,15,56,223,216 361.byte 102,15,56,223,224 362.byte 102,15,56,223,232 363.byte 102,15,56,223,240 364.byte 102,15,56,223,248 365 .byte 0xf3,0xc3 366 367 368.p2align 4 369_aesni_encrypt8: 370 movups (%rcx),%xmm0 371 shll $4,%eax 372 movups 16(%rcx),%xmm1 373 xorps %xmm0,%xmm2 374 xorps %xmm0,%xmm3 375 pxor %xmm0,%xmm4 376 pxor %xmm0,%xmm5 377 pxor %xmm0,%xmm6 378 leaq 32(%rcx,%rax,1),%rcx 379 negq %rax 380.byte 102,15,56,220,209 381 pxor %xmm0,%xmm7 382 pxor %xmm0,%xmm8 383.byte 102,15,56,220,217 384 pxor %xmm0,%xmm9 385 movups (%rcx,%rax,1),%xmm0 386 addq $16,%rax 387 jmp L$enc_loop8_inner 388.p2align 4 389L$enc_loop8: 390.byte 102,15,56,220,209 391.byte 102,15,56,220,217 392L$enc_loop8_inner: 393.byte 102,15,56,220,225 394.byte 102,15,56,220,233 395.byte 102,15,56,220,241 396.byte 102,15,56,220,249 397.byte 102,68,15,56,220,193 398.byte 102,68,15,56,220,201 399L$enc_loop8_enter: 400 movups (%rcx,%rax,1),%xmm1 401 addq $32,%rax 402.byte 102,15,56,220,208 403.byte 102,15,56,220,216 404.byte 102,15,56,220,224 405.byte 102,15,56,220,232 406.byte 102,15,56,220,240 407.byte 102,15,56,220,248 408.byte 102,68,15,56,220,192 409.byte 102,68,15,56,220,200 410 movups -16(%rcx,%rax,1),%xmm0 411 jnz L$enc_loop8 412 413.byte 102,15,56,220,209 414.byte 102,15,56,220,217 415.byte 102,15,56,220,225 416.byte 102,15,56,220,233 417.byte 102,15,56,220,241 418.byte 102,15,56,220,249 419.byte 102,68,15,56,220,193 420.byte 102,68,15,56,220,201 421.byte 102,15,56,221,208 422.byte 102,15,56,221,216 423.byte 102,15,56,221,224 424.byte 102,15,56,221,232 425.byte 102,15,56,221,240 426.byte 102,15,56,221,248 427.byte 102,68,15,56,221,192 428.byte 102,68,15,56,221,200 429 .byte 0xf3,0xc3 430 431 432.p2align 4 433_aesni_decrypt8: 434 movups (%rcx),%xmm0 435 shll $4,%eax 436 movups 16(%rcx),%xmm1 437 xorps %xmm0,%xmm2 438 xorps %xmm0,%xmm3 439 pxor %xmm0,%xmm4 440 pxor %xmm0,%xmm5 441 pxor %xmm0,%xmm6 442 leaq 32(%rcx,%rax,1),%rcx 443 negq %rax 444.byte 102,15,56,222,209 445 pxor %xmm0,%xmm7 446 pxor %xmm0,%xmm8 447.byte 102,15,56,222,217 448 pxor %xmm0,%xmm9 449 movups (%rcx,%rax,1),%xmm0 450 addq $16,%rax 451 jmp L$dec_loop8_inner 452.p2align 4 453L$dec_loop8: 454.byte 102,15,56,222,209 455.byte 102,15,56,222,217 456L$dec_loop8_inner: 457.byte 102,15,56,222,225 458.byte 102,15,56,222,233 459.byte 102,15,56,222,241 460.byte 102,15,56,222,249 461.byte 102,68,15,56,222,193 462.byte 102,68,15,56,222,201 463L$dec_loop8_enter: 464 movups (%rcx,%rax,1),%xmm1 465 addq $32,%rax 466.byte 102,15,56,222,208 467.byte 102,15,56,222,216 468.byte 102,15,56,222,224 469.byte 102,15,56,222,232 470.byte 102,15,56,222,240 471.byte 102,15,56,222,248 472.byte 102,68,15,56,222,192 473.byte 102,68,15,56,222,200 474 movups -16(%rcx,%rax,1),%xmm0 475 jnz L$dec_loop8 476 477.byte 102,15,56,222,209 478.byte 102,15,56,222,217 479.byte 102,15,56,222,225 480.byte 102,15,56,222,233 481.byte 102,15,56,222,241 482.byte 102,15,56,222,249 483.byte 102,68,15,56,222,193 484.byte 102,68,15,56,222,201 485.byte 102,15,56,223,208 486.byte 102,15,56,223,216 487.byte 102,15,56,223,224 488.byte 102,15,56,223,232 489.byte 102,15,56,223,240 490.byte 102,15,56,223,248 491.byte 102,68,15,56,223,192 492.byte 102,68,15,56,223,200 493 .byte 0xf3,0xc3 494 495.globl _aesni_ecb_encrypt 496.private_extern _aesni_ecb_encrypt 497 498.p2align 4 499_aesni_ecb_encrypt: 500 andq $-16,%rdx 501 jz L$ecb_ret 502 503 movl 240(%rcx),%eax 504 movups (%rcx),%xmm0 505 movq %rcx,%r11 506 movl %eax,%r10d 507 testl %r8d,%r8d 508 jz L$ecb_decrypt 509 510 cmpq $128,%rdx 511 jb L$ecb_enc_tail 512 513 movdqu (%rdi),%xmm2 514 movdqu 16(%rdi),%xmm3 515 movdqu 32(%rdi),%xmm4 516 movdqu 48(%rdi),%xmm5 517 movdqu 64(%rdi),%xmm6 518 movdqu 80(%rdi),%xmm7 519 movdqu 96(%rdi),%xmm8 520 movdqu 112(%rdi),%xmm9 521 leaq 128(%rdi),%rdi 522 subq $128,%rdx 523 jmp L$ecb_enc_loop8_enter 524.p2align 4 525L$ecb_enc_loop8: 526 movups %xmm2,(%rsi) 527 movq %r11,%rcx 528 movdqu (%rdi),%xmm2 529 movl %r10d,%eax 530 movups %xmm3,16(%rsi) 531 movdqu 16(%rdi),%xmm3 532 movups %xmm4,32(%rsi) 533 movdqu 32(%rdi),%xmm4 534 movups %xmm5,48(%rsi) 535 movdqu 48(%rdi),%xmm5 536 movups %xmm6,64(%rsi) 537 movdqu 64(%rdi),%xmm6 538 movups %xmm7,80(%rsi) 539 movdqu 80(%rdi),%xmm7 540 movups %xmm8,96(%rsi) 541 movdqu 96(%rdi),%xmm8 542 movups %xmm9,112(%rsi) 543 leaq 128(%rsi),%rsi 544 movdqu 112(%rdi),%xmm9 545 leaq 128(%rdi),%rdi 546L$ecb_enc_loop8_enter: 547 548 call _aesni_encrypt8 549 550 subq $128,%rdx 551 jnc L$ecb_enc_loop8 552 553 movups %xmm2,(%rsi) 554 movq %r11,%rcx 555 movups %xmm3,16(%rsi) 556 movl %r10d,%eax 557 movups %xmm4,32(%rsi) 558 movups %xmm5,48(%rsi) 559 movups %xmm6,64(%rsi) 560 movups %xmm7,80(%rsi) 561 movups %xmm8,96(%rsi) 562 movups %xmm9,112(%rsi) 563 leaq 128(%rsi),%rsi 564 addq $128,%rdx 565 jz L$ecb_ret 566 567L$ecb_enc_tail: 568 movups (%rdi),%xmm2 569 cmpq $32,%rdx 570 jb L$ecb_enc_one 571 movups 16(%rdi),%xmm3 572 je L$ecb_enc_two 573 movups 32(%rdi),%xmm4 574 cmpq $64,%rdx 575 jb L$ecb_enc_three 576 movups 48(%rdi),%xmm5 577 je L$ecb_enc_four 578 movups 64(%rdi),%xmm6 579 cmpq $96,%rdx 580 jb L$ecb_enc_five 581 movups 80(%rdi),%xmm7 582 je L$ecb_enc_six 583 movdqu 96(%rdi),%xmm8 584 xorps %xmm9,%xmm9 585 call _aesni_encrypt8 586 movups %xmm2,(%rsi) 587 movups %xmm3,16(%rsi) 588 movups %xmm4,32(%rsi) 589 movups %xmm5,48(%rsi) 590 movups %xmm6,64(%rsi) 591 movups %xmm7,80(%rsi) 592 movups %xmm8,96(%rsi) 593 jmp L$ecb_ret 594.p2align 4 595L$ecb_enc_one: 596 movups (%rcx),%xmm0 597 movups 16(%rcx),%xmm1 598 leaq 32(%rcx),%rcx 599 xorps %xmm0,%xmm2 600L$oop_enc1_3: 601.byte 102,15,56,220,209 602 decl %eax 603 movups (%rcx),%xmm1 604 leaq 16(%rcx),%rcx 605 jnz L$oop_enc1_3 606.byte 102,15,56,221,209 607 movups %xmm2,(%rsi) 608 jmp L$ecb_ret 609.p2align 4 610L$ecb_enc_two: 611 call _aesni_encrypt2 612 movups %xmm2,(%rsi) 613 movups %xmm3,16(%rsi) 614 jmp L$ecb_ret 615.p2align 4 616L$ecb_enc_three: 617 call _aesni_encrypt3 618 movups %xmm2,(%rsi) 619 movups %xmm3,16(%rsi) 620 movups %xmm4,32(%rsi) 621 jmp L$ecb_ret 622.p2align 4 623L$ecb_enc_four: 624 call _aesni_encrypt4 625 movups %xmm2,(%rsi) 626 movups %xmm3,16(%rsi) 627 movups %xmm4,32(%rsi) 628 movups %xmm5,48(%rsi) 629 jmp L$ecb_ret 630.p2align 4 631L$ecb_enc_five: 632 xorps %xmm7,%xmm7 633 call _aesni_encrypt6 634 movups %xmm2,(%rsi) 635 movups %xmm3,16(%rsi) 636 movups %xmm4,32(%rsi) 637 movups %xmm5,48(%rsi) 638 movups %xmm6,64(%rsi) 639 jmp L$ecb_ret 640.p2align 4 641L$ecb_enc_six: 642 call _aesni_encrypt6 643 movups %xmm2,(%rsi) 644 movups %xmm3,16(%rsi) 645 movups %xmm4,32(%rsi) 646 movups %xmm5,48(%rsi) 647 movups %xmm6,64(%rsi) 648 movups %xmm7,80(%rsi) 649 jmp L$ecb_ret 650 651.p2align 4 652L$ecb_decrypt: 653 cmpq $128,%rdx 654 jb L$ecb_dec_tail 655 656 movdqu (%rdi),%xmm2 657 movdqu 16(%rdi),%xmm3 658 movdqu 32(%rdi),%xmm4 659 movdqu 48(%rdi),%xmm5 660 movdqu 64(%rdi),%xmm6 661 movdqu 80(%rdi),%xmm7 662 movdqu 96(%rdi),%xmm8 663 movdqu 112(%rdi),%xmm9 664 leaq 128(%rdi),%rdi 665 subq $128,%rdx 666 jmp L$ecb_dec_loop8_enter 667.p2align 4 668L$ecb_dec_loop8: 669 movups %xmm2,(%rsi) 670 movq %r11,%rcx 671 movdqu (%rdi),%xmm2 672 movl %r10d,%eax 673 movups %xmm3,16(%rsi) 674 movdqu 16(%rdi),%xmm3 675 movups %xmm4,32(%rsi) 676 movdqu 32(%rdi),%xmm4 677 movups %xmm5,48(%rsi) 678 movdqu 48(%rdi),%xmm5 679 movups %xmm6,64(%rsi) 680 movdqu 64(%rdi),%xmm6 681 movups %xmm7,80(%rsi) 682 movdqu 80(%rdi),%xmm7 683 movups %xmm8,96(%rsi) 684 movdqu 96(%rdi),%xmm8 685 movups %xmm9,112(%rsi) 686 leaq 128(%rsi),%rsi 687 movdqu 112(%rdi),%xmm9 688 leaq 128(%rdi),%rdi 689L$ecb_dec_loop8_enter: 690 691 call _aesni_decrypt8 692 693 movups (%r11),%xmm0 694 subq $128,%rdx 695 jnc L$ecb_dec_loop8 696 697 movups %xmm2,(%rsi) 698 pxor %xmm2,%xmm2 699 movq %r11,%rcx 700 movups %xmm3,16(%rsi) 701 pxor %xmm3,%xmm3 702 movl %r10d,%eax 703 movups %xmm4,32(%rsi) 704 pxor %xmm4,%xmm4 705 movups %xmm5,48(%rsi) 706 pxor %xmm5,%xmm5 707 movups %xmm6,64(%rsi) 708 pxor %xmm6,%xmm6 709 movups %xmm7,80(%rsi) 710 pxor %xmm7,%xmm7 711 movups %xmm8,96(%rsi) 712 pxor %xmm8,%xmm8 713 movups %xmm9,112(%rsi) 714 pxor %xmm9,%xmm9 715 leaq 128(%rsi),%rsi 716 addq $128,%rdx 717 jz L$ecb_ret 718 719L$ecb_dec_tail: 720 movups (%rdi),%xmm2 721 cmpq $32,%rdx 722 jb L$ecb_dec_one 723 movups 16(%rdi),%xmm3 724 je L$ecb_dec_two 725 movups 32(%rdi),%xmm4 726 cmpq $64,%rdx 727 jb L$ecb_dec_three 728 movups 48(%rdi),%xmm5 729 je L$ecb_dec_four 730 movups 64(%rdi),%xmm6 731 cmpq $96,%rdx 732 jb L$ecb_dec_five 733 movups 80(%rdi),%xmm7 734 je L$ecb_dec_six 735 movups 96(%rdi),%xmm8 736 movups (%rcx),%xmm0 737 xorps %xmm9,%xmm9 738 call _aesni_decrypt8 739 movups %xmm2,(%rsi) 740 pxor %xmm2,%xmm2 741 movups %xmm3,16(%rsi) 742 pxor %xmm3,%xmm3 743 movups %xmm4,32(%rsi) 744 pxor %xmm4,%xmm4 745 movups %xmm5,48(%rsi) 746 pxor %xmm5,%xmm5 747 movups %xmm6,64(%rsi) 748 pxor %xmm6,%xmm6 749 movups %xmm7,80(%rsi) 750 pxor %xmm7,%xmm7 751 movups %xmm8,96(%rsi) 752 pxor %xmm8,%xmm8 753 pxor %xmm9,%xmm9 754 jmp L$ecb_ret 755.p2align 4 756L$ecb_dec_one: 757 movups (%rcx),%xmm0 758 movups 16(%rcx),%xmm1 759 leaq 32(%rcx),%rcx 760 xorps %xmm0,%xmm2 761L$oop_dec1_4: 762.byte 102,15,56,222,209 763 decl %eax 764 movups (%rcx),%xmm1 765 leaq 16(%rcx),%rcx 766 jnz L$oop_dec1_4 767.byte 102,15,56,223,209 768 movups %xmm2,(%rsi) 769 pxor %xmm2,%xmm2 770 jmp L$ecb_ret 771.p2align 4 772L$ecb_dec_two: 773 call _aesni_decrypt2 774 movups %xmm2,(%rsi) 775 pxor %xmm2,%xmm2 776 movups %xmm3,16(%rsi) 777 pxor %xmm3,%xmm3 778 jmp L$ecb_ret 779.p2align 4 780L$ecb_dec_three: 781 call _aesni_decrypt3 782 movups %xmm2,(%rsi) 783 pxor %xmm2,%xmm2 784 movups %xmm3,16(%rsi) 785 pxor %xmm3,%xmm3 786 movups %xmm4,32(%rsi) 787 pxor %xmm4,%xmm4 788 jmp L$ecb_ret 789.p2align 4 790L$ecb_dec_four: 791 call _aesni_decrypt4 792 movups %xmm2,(%rsi) 793 pxor %xmm2,%xmm2 794 movups %xmm3,16(%rsi) 795 pxor %xmm3,%xmm3 796 movups %xmm4,32(%rsi) 797 pxor %xmm4,%xmm4 798 movups %xmm5,48(%rsi) 799 pxor %xmm5,%xmm5 800 jmp L$ecb_ret 801.p2align 4 802L$ecb_dec_five: 803 xorps %xmm7,%xmm7 804 call _aesni_decrypt6 805 movups %xmm2,(%rsi) 806 pxor %xmm2,%xmm2 807 movups %xmm3,16(%rsi) 808 pxor %xmm3,%xmm3 809 movups %xmm4,32(%rsi) 810 pxor %xmm4,%xmm4 811 movups %xmm5,48(%rsi) 812 pxor %xmm5,%xmm5 813 movups %xmm6,64(%rsi) 814 pxor %xmm6,%xmm6 815 pxor %xmm7,%xmm7 816 jmp L$ecb_ret 817.p2align 4 818L$ecb_dec_six: 819 call _aesni_decrypt6 820 movups %xmm2,(%rsi) 821 pxor %xmm2,%xmm2 822 movups %xmm3,16(%rsi) 823 pxor %xmm3,%xmm3 824 movups %xmm4,32(%rsi) 825 pxor %xmm4,%xmm4 826 movups %xmm5,48(%rsi) 827 pxor %xmm5,%xmm5 828 movups %xmm6,64(%rsi) 829 pxor %xmm6,%xmm6 830 movups %xmm7,80(%rsi) 831 pxor %xmm7,%xmm7 832 833L$ecb_ret: 834 xorps %xmm0,%xmm0 835 pxor %xmm1,%xmm1 836 .byte 0xf3,0xc3 837 838.globl _aesni_ccm64_encrypt_blocks 839.private_extern _aesni_ccm64_encrypt_blocks 840 841.p2align 4 842_aesni_ccm64_encrypt_blocks: 843 movl 240(%rcx),%eax 844 movdqu (%r8),%xmm6 845 movdqa L$increment64(%rip),%xmm9 846 movdqa L$bswap_mask(%rip),%xmm7 847 848 shll $4,%eax 849 movl $16,%r10d 850 leaq 0(%rcx),%r11 851 movdqu (%r9),%xmm3 852 movdqa %xmm6,%xmm2 853 leaq 32(%rcx,%rax,1),%rcx 854.byte 102,15,56,0,247 855 subq %rax,%r10 856 jmp L$ccm64_enc_outer 857.p2align 4 858L$ccm64_enc_outer: 859 movups (%r11),%xmm0 860 movq %r10,%rax 861 movups (%rdi),%xmm8 862 863 xorps %xmm0,%xmm2 864 movups 16(%r11),%xmm1 865 xorps %xmm8,%xmm0 866 xorps %xmm0,%xmm3 867 movups 32(%r11),%xmm0 868 869L$ccm64_enc2_loop: 870.byte 102,15,56,220,209 871.byte 102,15,56,220,217 872 movups (%rcx,%rax,1),%xmm1 873 addq $32,%rax 874.byte 102,15,56,220,208 875.byte 102,15,56,220,216 876 movups -16(%rcx,%rax,1),%xmm0 877 jnz L$ccm64_enc2_loop 878.byte 102,15,56,220,209 879.byte 102,15,56,220,217 880 paddq %xmm9,%xmm6 881 decq %rdx 882.byte 102,15,56,221,208 883.byte 102,15,56,221,216 884 885 leaq 16(%rdi),%rdi 886 xorps %xmm2,%xmm8 887 movdqa %xmm6,%xmm2 888 movups %xmm8,(%rsi) 889.byte 102,15,56,0,215 890 leaq 16(%rsi),%rsi 891 jnz L$ccm64_enc_outer 892 893 pxor %xmm0,%xmm0 894 pxor %xmm1,%xmm1 895 pxor %xmm2,%xmm2 896 movups %xmm3,(%r9) 897 pxor %xmm3,%xmm3 898 pxor %xmm8,%xmm8 899 pxor %xmm6,%xmm6 900 .byte 0xf3,0xc3 901 902.globl _aesni_ccm64_decrypt_blocks 903.private_extern _aesni_ccm64_decrypt_blocks 904 905.p2align 4 906_aesni_ccm64_decrypt_blocks: 907 movl 240(%rcx),%eax 908 movups (%r8),%xmm6 909 movdqu (%r9),%xmm3 910 movdqa L$increment64(%rip),%xmm9 911 movdqa L$bswap_mask(%rip),%xmm7 912 913 movaps %xmm6,%xmm2 914 movl %eax,%r10d 915 movq %rcx,%r11 916.byte 102,15,56,0,247 917 movups (%rcx),%xmm0 918 movups 16(%rcx),%xmm1 919 leaq 32(%rcx),%rcx 920 xorps %xmm0,%xmm2 921L$oop_enc1_5: 922.byte 102,15,56,220,209 923 decl %eax 924 movups (%rcx),%xmm1 925 leaq 16(%rcx),%rcx 926 jnz L$oop_enc1_5 927.byte 102,15,56,221,209 928 shll $4,%r10d 929 movl $16,%eax 930 movups (%rdi),%xmm8 931 paddq %xmm9,%xmm6 932 leaq 16(%rdi),%rdi 933 subq %r10,%rax 934 leaq 32(%r11,%r10,1),%rcx 935 movq %rax,%r10 936 jmp L$ccm64_dec_outer 937.p2align 4 938L$ccm64_dec_outer: 939 xorps %xmm2,%xmm8 940 movdqa %xmm6,%xmm2 941 movups %xmm8,(%rsi) 942 leaq 16(%rsi),%rsi 943.byte 102,15,56,0,215 944 945 subq $1,%rdx 946 jz L$ccm64_dec_break 947 948 movups (%r11),%xmm0 949 movq %r10,%rax 950 movups 16(%r11),%xmm1 951 xorps %xmm0,%xmm8 952 xorps %xmm0,%xmm2 953 xorps %xmm8,%xmm3 954 movups 32(%r11),%xmm0 955 jmp L$ccm64_dec2_loop 956.p2align 4 957L$ccm64_dec2_loop: 958.byte 102,15,56,220,209 959.byte 102,15,56,220,217 960 movups (%rcx,%rax,1),%xmm1 961 addq $32,%rax 962.byte 102,15,56,220,208 963.byte 102,15,56,220,216 964 movups -16(%rcx,%rax,1),%xmm0 965 jnz L$ccm64_dec2_loop 966 movups (%rdi),%xmm8 967 paddq %xmm9,%xmm6 968.byte 102,15,56,220,209 969.byte 102,15,56,220,217 970.byte 102,15,56,221,208 971.byte 102,15,56,221,216 972 leaq 16(%rdi),%rdi 973 jmp L$ccm64_dec_outer 974 975.p2align 4 976L$ccm64_dec_break: 977 978 movl 240(%r11),%eax 979 movups (%r11),%xmm0 980 movups 16(%r11),%xmm1 981 xorps %xmm0,%xmm8 982 leaq 32(%r11),%r11 983 xorps %xmm8,%xmm3 984L$oop_enc1_6: 985.byte 102,15,56,220,217 986 decl %eax 987 movups (%r11),%xmm1 988 leaq 16(%r11),%r11 989 jnz L$oop_enc1_6 990.byte 102,15,56,221,217 991 pxor %xmm0,%xmm0 992 pxor %xmm1,%xmm1 993 pxor %xmm2,%xmm2 994 movups %xmm3,(%r9) 995 pxor %xmm3,%xmm3 996 pxor %xmm8,%xmm8 997 pxor %xmm6,%xmm6 998 .byte 0xf3,0xc3 999 1000.globl _aesni_ctr32_encrypt_blocks 1001.private_extern _aesni_ctr32_encrypt_blocks 1002 1003.p2align 4 1004_aesni_ctr32_encrypt_blocks: 1005 cmpq $1,%rdx 1006 jne L$ctr32_bulk 1007 1008 1009 1010 movups (%r8),%xmm2 1011 movups (%rdi),%xmm3 1012 movl 240(%rcx),%edx 1013 movups (%rcx),%xmm0 1014 movups 16(%rcx),%xmm1 1015 leaq 32(%rcx),%rcx 1016 xorps %xmm0,%xmm2 1017L$oop_enc1_7: 1018.byte 102,15,56,220,209 1019 decl %edx 1020 movups (%rcx),%xmm1 1021 leaq 16(%rcx),%rcx 1022 jnz L$oop_enc1_7 1023.byte 102,15,56,221,209 1024 pxor %xmm0,%xmm0 1025 pxor %xmm1,%xmm1 1026 xorps %xmm3,%xmm2 1027 pxor %xmm3,%xmm3 1028 movups %xmm2,(%rsi) 1029 xorps %xmm2,%xmm2 1030 jmp L$ctr32_epilogue 1031 1032.p2align 4 1033L$ctr32_bulk: 1034 leaq (%rsp),%rax 1035 pushq %rbp 1036 subq $128,%rsp 1037 andq $-16,%rsp 1038 leaq -8(%rax),%rbp 1039 1040 1041 1042 1043 movdqu (%r8),%xmm2 1044 movdqu (%rcx),%xmm0 1045 movl 12(%r8),%r8d 1046 pxor %xmm0,%xmm2 1047 movl 12(%rcx),%r11d 1048 movdqa %xmm2,0(%rsp) 1049 bswapl %r8d 1050 movdqa %xmm2,%xmm3 1051 movdqa %xmm2,%xmm4 1052 movdqa %xmm2,%xmm5 1053 movdqa %xmm2,64(%rsp) 1054 movdqa %xmm2,80(%rsp) 1055 movdqa %xmm2,96(%rsp) 1056 movq %rdx,%r10 1057 movdqa %xmm2,112(%rsp) 1058 1059 leaq 1(%r8),%rax 1060 leaq 2(%r8),%rdx 1061 bswapl %eax 1062 bswapl %edx 1063 xorl %r11d,%eax 1064 xorl %r11d,%edx 1065.byte 102,15,58,34,216,3 1066 leaq 3(%r8),%rax 1067 movdqa %xmm3,16(%rsp) 1068.byte 102,15,58,34,226,3 1069 bswapl %eax 1070 movq %r10,%rdx 1071 leaq 4(%r8),%r10 1072 movdqa %xmm4,32(%rsp) 1073 xorl %r11d,%eax 1074 bswapl %r10d 1075.byte 102,15,58,34,232,3 1076 xorl %r11d,%r10d 1077 movdqa %xmm5,48(%rsp) 1078 leaq 5(%r8),%r9 1079 movl %r10d,64+12(%rsp) 1080 bswapl %r9d 1081 leaq 6(%r8),%r10 1082 movl 240(%rcx),%eax 1083 xorl %r11d,%r9d 1084 bswapl %r10d 1085 movl %r9d,80+12(%rsp) 1086 xorl %r11d,%r10d 1087 leaq 7(%r8),%r9 1088 movl %r10d,96+12(%rsp) 1089 bswapl %r9d 1090 movl _OPENSSL_ia32cap_P+4(%rip),%r10d 1091 xorl %r11d,%r9d 1092 andl $71303168,%r10d 1093 movl %r9d,112+12(%rsp) 1094 1095 movups 16(%rcx),%xmm1 1096 1097 movdqa 64(%rsp),%xmm6 1098 movdqa 80(%rsp),%xmm7 1099 1100 cmpq $8,%rdx 1101 jb L$ctr32_tail 1102 1103 subq $6,%rdx 1104 cmpl $4194304,%r10d 1105 je L$ctr32_6x 1106 1107 leaq 128(%rcx),%rcx 1108 subq $2,%rdx 1109 jmp L$ctr32_loop8 1110 1111.p2align 4 1112L$ctr32_6x: 1113 shll $4,%eax 1114 movl $48,%r10d 1115 bswapl %r11d 1116 leaq 32(%rcx,%rax,1),%rcx 1117 subq %rax,%r10 1118 jmp L$ctr32_loop6 1119 1120.p2align 4 1121L$ctr32_loop6: 1122 addl $6,%r8d 1123 movups -48(%rcx,%r10,1),%xmm0 1124.byte 102,15,56,220,209 1125 movl %r8d,%eax 1126 xorl %r11d,%eax 1127.byte 102,15,56,220,217 1128.byte 0x0f,0x38,0xf1,0x44,0x24,12 1129 leal 1(%r8),%eax 1130.byte 102,15,56,220,225 1131 xorl %r11d,%eax 1132.byte 0x0f,0x38,0xf1,0x44,0x24,28 1133.byte 102,15,56,220,233 1134 leal 2(%r8),%eax 1135 xorl %r11d,%eax 1136.byte 102,15,56,220,241 1137.byte 0x0f,0x38,0xf1,0x44,0x24,44 1138 leal 3(%r8),%eax 1139.byte 102,15,56,220,249 1140 movups -32(%rcx,%r10,1),%xmm1 1141 xorl %r11d,%eax 1142 1143.byte 102,15,56,220,208 1144.byte 0x0f,0x38,0xf1,0x44,0x24,60 1145 leal 4(%r8),%eax 1146.byte 102,15,56,220,216 1147 xorl %r11d,%eax 1148.byte 0x0f,0x38,0xf1,0x44,0x24,76 1149.byte 102,15,56,220,224 1150 leal 5(%r8),%eax 1151 xorl %r11d,%eax 1152.byte 102,15,56,220,232 1153.byte 0x0f,0x38,0xf1,0x44,0x24,92 1154 movq %r10,%rax 1155.byte 102,15,56,220,240 1156.byte 102,15,56,220,248 1157 movups -16(%rcx,%r10,1),%xmm0 1158 1159 call L$enc_loop6 1160 1161 movdqu (%rdi),%xmm8 1162 movdqu 16(%rdi),%xmm9 1163 movdqu 32(%rdi),%xmm10 1164 movdqu 48(%rdi),%xmm11 1165 movdqu 64(%rdi),%xmm12 1166 movdqu 80(%rdi),%xmm13 1167 leaq 96(%rdi),%rdi 1168 movups -64(%rcx,%r10,1),%xmm1 1169 pxor %xmm2,%xmm8 1170 movaps 0(%rsp),%xmm2 1171 pxor %xmm3,%xmm9 1172 movaps 16(%rsp),%xmm3 1173 pxor %xmm4,%xmm10 1174 movaps 32(%rsp),%xmm4 1175 pxor %xmm5,%xmm11 1176 movaps 48(%rsp),%xmm5 1177 pxor %xmm6,%xmm12 1178 movaps 64(%rsp),%xmm6 1179 pxor %xmm7,%xmm13 1180 movaps 80(%rsp),%xmm7 1181 movdqu %xmm8,(%rsi) 1182 movdqu %xmm9,16(%rsi) 1183 movdqu %xmm10,32(%rsi) 1184 movdqu %xmm11,48(%rsi) 1185 movdqu %xmm12,64(%rsi) 1186 movdqu %xmm13,80(%rsi) 1187 leaq 96(%rsi),%rsi 1188 1189 subq $6,%rdx 1190 jnc L$ctr32_loop6 1191 1192 addq $6,%rdx 1193 jz L$ctr32_done 1194 1195 leal -48(%r10),%eax 1196 leaq -80(%rcx,%r10,1),%rcx 1197 negl %eax 1198 shrl $4,%eax 1199 jmp L$ctr32_tail 1200 1201.p2align 5 1202L$ctr32_loop8: 1203 addl $8,%r8d 1204 movdqa 96(%rsp),%xmm8 1205.byte 102,15,56,220,209 1206 movl %r8d,%r9d 1207 movdqa 112(%rsp),%xmm9 1208.byte 102,15,56,220,217 1209 bswapl %r9d 1210 movups 32-128(%rcx),%xmm0 1211.byte 102,15,56,220,225 1212 xorl %r11d,%r9d 1213 nop 1214.byte 102,15,56,220,233 1215 movl %r9d,0+12(%rsp) 1216 leaq 1(%r8),%r9 1217.byte 102,15,56,220,241 1218.byte 102,15,56,220,249 1219.byte 102,68,15,56,220,193 1220.byte 102,68,15,56,220,201 1221 movups 48-128(%rcx),%xmm1 1222 bswapl %r9d 1223.byte 102,15,56,220,208 1224.byte 102,15,56,220,216 1225 xorl %r11d,%r9d 1226.byte 0x66,0x90 1227.byte 102,15,56,220,224 1228.byte 102,15,56,220,232 1229 movl %r9d,16+12(%rsp) 1230 leaq 2(%r8),%r9 1231.byte 102,15,56,220,240 1232.byte 102,15,56,220,248 1233.byte 102,68,15,56,220,192 1234.byte 102,68,15,56,220,200 1235 movups 64-128(%rcx),%xmm0 1236 bswapl %r9d 1237.byte 102,15,56,220,209 1238.byte 102,15,56,220,217 1239 xorl %r11d,%r9d 1240.byte 0x66,0x90 1241.byte 102,15,56,220,225 1242.byte 102,15,56,220,233 1243 movl %r9d,32+12(%rsp) 1244 leaq 3(%r8),%r9 1245.byte 102,15,56,220,241 1246.byte 102,15,56,220,249 1247.byte 102,68,15,56,220,193 1248.byte 102,68,15,56,220,201 1249 movups 80-128(%rcx),%xmm1 1250 bswapl %r9d 1251.byte 102,15,56,220,208 1252.byte 102,15,56,220,216 1253 xorl %r11d,%r9d 1254.byte 0x66,0x90 1255.byte 102,15,56,220,224 1256.byte 102,15,56,220,232 1257 movl %r9d,48+12(%rsp) 1258 leaq 4(%r8),%r9 1259.byte 102,15,56,220,240 1260.byte 102,15,56,220,248 1261.byte 102,68,15,56,220,192 1262.byte 102,68,15,56,220,200 1263 movups 96-128(%rcx),%xmm0 1264 bswapl %r9d 1265.byte 102,15,56,220,209 1266.byte 102,15,56,220,217 1267 xorl %r11d,%r9d 1268.byte 0x66,0x90 1269.byte 102,15,56,220,225 1270.byte 102,15,56,220,233 1271 movl %r9d,64+12(%rsp) 1272 leaq 5(%r8),%r9 1273.byte 102,15,56,220,241 1274.byte 102,15,56,220,249 1275.byte 102,68,15,56,220,193 1276.byte 102,68,15,56,220,201 1277 movups 112-128(%rcx),%xmm1 1278 bswapl %r9d 1279.byte 102,15,56,220,208 1280.byte 102,15,56,220,216 1281 xorl %r11d,%r9d 1282.byte 0x66,0x90 1283.byte 102,15,56,220,224 1284.byte 102,15,56,220,232 1285 movl %r9d,80+12(%rsp) 1286 leaq 6(%r8),%r9 1287.byte 102,15,56,220,240 1288.byte 102,15,56,220,248 1289.byte 102,68,15,56,220,192 1290.byte 102,68,15,56,220,200 1291 movups 128-128(%rcx),%xmm0 1292 bswapl %r9d 1293.byte 102,15,56,220,209 1294.byte 102,15,56,220,217 1295 xorl %r11d,%r9d 1296.byte 0x66,0x90 1297.byte 102,15,56,220,225 1298.byte 102,15,56,220,233 1299 movl %r9d,96+12(%rsp) 1300 leaq 7(%r8),%r9 1301.byte 102,15,56,220,241 1302.byte 102,15,56,220,249 1303.byte 102,68,15,56,220,193 1304.byte 102,68,15,56,220,201 1305 movups 144-128(%rcx),%xmm1 1306 bswapl %r9d 1307.byte 102,15,56,220,208 1308.byte 102,15,56,220,216 1309.byte 102,15,56,220,224 1310 xorl %r11d,%r9d 1311 movdqu 0(%rdi),%xmm10 1312.byte 102,15,56,220,232 1313 movl %r9d,112+12(%rsp) 1314 cmpl $11,%eax 1315.byte 102,15,56,220,240 1316.byte 102,15,56,220,248 1317.byte 102,68,15,56,220,192 1318.byte 102,68,15,56,220,200 1319 movups 160-128(%rcx),%xmm0 1320 1321 jb L$ctr32_enc_done 1322 1323.byte 102,15,56,220,209 1324.byte 102,15,56,220,217 1325.byte 102,15,56,220,225 1326.byte 102,15,56,220,233 1327.byte 102,15,56,220,241 1328.byte 102,15,56,220,249 1329.byte 102,68,15,56,220,193 1330.byte 102,68,15,56,220,201 1331 movups 176-128(%rcx),%xmm1 1332 1333.byte 102,15,56,220,208 1334.byte 102,15,56,220,216 1335.byte 102,15,56,220,224 1336.byte 102,15,56,220,232 1337.byte 102,15,56,220,240 1338.byte 102,15,56,220,248 1339.byte 102,68,15,56,220,192 1340.byte 102,68,15,56,220,200 1341 movups 192-128(%rcx),%xmm0 1342 je L$ctr32_enc_done 1343 1344.byte 102,15,56,220,209 1345.byte 102,15,56,220,217 1346.byte 102,15,56,220,225 1347.byte 102,15,56,220,233 1348.byte 102,15,56,220,241 1349.byte 102,15,56,220,249 1350.byte 102,68,15,56,220,193 1351.byte 102,68,15,56,220,201 1352 movups 208-128(%rcx),%xmm1 1353 1354.byte 102,15,56,220,208 1355.byte 102,15,56,220,216 1356.byte 102,15,56,220,224 1357.byte 102,15,56,220,232 1358.byte 102,15,56,220,240 1359.byte 102,15,56,220,248 1360.byte 102,68,15,56,220,192 1361.byte 102,68,15,56,220,200 1362 movups 224-128(%rcx),%xmm0 1363 jmp L$ctr32_enc_done 1364 1365.p2align 4 1366L$ctr32_enc_done: 1367 movdqu 16(%rdi),%xmm11 1368 pxor %xmm0,%xmm10 1369 movdqu 32(%rdi),%xmm12 1370 pxor %xmm0,%xmm11 1371 movdqu 48(%rdi),%xmm13 1372 pxor %xmm0,%xmm12 1373 movdqu 64(%rdi),%xmm14 1374 pxor %xmm0,%xmm13 1375 movdqu 80(%rdi),%xmm15 1376 pxor %xmm0,%xmm14 1377 pxor %xmm0,%xmm15 1378.byte 102,15,56,220,209 1379.byte 102,15,56,220,217 1380.byte 102,15,56,220,225 1381.byte 102,15,56,220,233 1382.byte 102,15,56,220,241 1383.byte 102,15,56,220,249 1384.byte 102,68,15,56,220,193 1385.byte 102,68,15,56,220,201 1386 movdqu 96(%rdi),%xmm1 1387 leaq 128(%rdi),%rdi 1388 1389.byte 102,65,15,56,221,210 1390 pxor %xmm0,%xmm1 1391 movdqu 112-128(%rdi),%xmm10 1392.byte 102,65,15,56,221,219 1393 pxor %xmm0,%xmm10 1394 movdqa 0(%rsp),%xmm11 1395.byte 102,65,15,56,221,228 1396.byte 102,65,15,56,221,237 1397 movdqa 16(%rsp),%xmm12 1398 movdqa 32(%rsp),%xmm13 1399.byte 102,65,15,56,221,246 1400.byte 102,65,15,56,221,255 1401 movdqa 48(%rsp),%xmm14 1402 movdqa 64(%rsp),%xmm15 1403.byte 102,68,15,56,221,193 1404 movdqa 80(%rsp),%xmm0 1405 movups 16-128(%rcx),%xmm1 1406.byte 102,69,15,56,221,202 1407 1408 movups %xmm2,(%rsi) 1409 movdqa %xmm11,%xmm2 1410 movups %xmm3,16(%rsi) 1411 movdqa %xmm12,%xmm3 1412 movups %xmm4,32(%rsi) 1413 movdqa %xmm13,%xmm4 1414 movups %xmm5,48(%rsi) 1415 movdqa %xmm14,%xmm5 1416 movups %xmm6,64(%rsi) 1417 movdqa %xmm15,%xmm6 1418 movups %xmm7,80(%rsi) 1419 movdqa %xmm0,%xmm7 1420 movups %xmm8,96(%rsi) 1421 movups %xmm9,112(%rsi) 1422 leaq 128(%rsi),%rsi 1423 1424 subq $8,%rdx 1425 jnc L$ctr32_loop8 1426 1427 addq $8,%rdx 1428 jz L$ctr32_done 1429 leaq -128(%rcx),%rcx 1430 1431L$ctr32_tail: 1432 1433 1434 leaq 16(%rcx),%rcx 1435 cmpq $4,%rdx 1436 jb L$ctr32_loop3 1437 je L$ctr32_loop4 1438 1439 1440 shll $4,%eax 1441 movdqa 96(%rsp),%xmm8 1442 pxor %xmm9,%xmm9 1443 1444 movups 16(%rcx),%xmm0 1445.byte 102,15,56,220,209 1446.byte 102,15,56,220,217 1447 leaq 32-16(%rcx,%rax,1),%rcx 1448 negq %rax 1449.byte 102,15,56,220,225 1450 addq $16,%rax 1451 movups (%rdi),%xmm10 1452.byte 102,15,56,220,233 1453.byte 102,15,56,220,241 1454 movups 16(%rdi),%xmm11 1455 movups 32(%rdi),%xmm12 1456.byte 102,15,56,220,249 1457.byte 102,68,15,56,220,193 1458 1459 call L$enc_loop8_enter 1460 1461 movdqu 48(%rdi),%xmm13 1462 pxor %xmm10,%xmm2 1463 movdqu 64(%rdi),%xmm10 1464 pxor %xmm11,%xmm3 1465 movdqu %xmm2,(%rsi) 1466 pxor %xmm12,%xmm4 1467 movdqu %xmm3,16(%rsi) 1468 pxor %xmm13,%xmm5 1469 movdqu %xmm4,32(%rsi) 1470 pxor %xmm10,%xmm6 1471 movdqu %xmm5,48(%rsi) 1472 movdqu %xmm6,64(%rsi) 1473 cmpq $6,%rdx 1474 jb L$ctr32_done 1475 1476 movups 80(%rdi),%xmm11 1477 xorps %xmm11,%xmm7 1478 movups %xmm7,80(%rsi) 1479 je L$ctr32_done 1480 1481 movups 96(%rdi),%xmm12 1482 xorps %xmm12,%xmm8 1483 movups %xmm8,96(%rsi) 1484 jmp L$ctr32_done 1485 1486.p2align 5 1487L$ctr32_loop4: 1488.byte 102,15,56,220,209 1489 leaq 16(%rcx),%rcx 1490 decl %eax 1491.byte 102,15,56,220,217 1492.byte 102,15,56,220,225 1493.byte 102,15,56,220,233 1494 movups (%rcx),%xmm1 1495 jnz L$ctr32_loop4 1496.byte 102,15,56,221,209 1497.byte 102,15,56,221,217 1498 movups (%rdi),%xmm10 1499 movups 16(%rdi),%xmm11 1500.byte 102,15,56,221,225 1501.byte 102,15,56,221,233 1502 movups 32(%rdi),%xmm12 1503 movups 48(%rdi),%xmm13 1504 1505 xorps %xmm10,%xmm2 1506 movups %xmm2,(%rsi) 1507 xorps %xmm11,%xmm3 1508 movups %xmm3,16(%rsi) 1509 pxor %xmm12,%xmm4 1510 movdqu %xmm4,32(%rsi) 1511 pxor %xmm13,%xmm5 1512 movdqu %xmm5,48(%rsi) 1513 jmp L$ctr32_done 1514 1515.p2align 5 1516L$ctr32_loop3: 1517.byte 102,15,56,220,209 1518 leaq 16(%rcx),%rcx 1519 decl %eax 1520.byte 102,15,56,220,217 1521.byte 102,15,56,220,225 1522 movups (%rcx),%xmm1 1523 jnz L$ctr32_loop3 1524.byte 102,15,56,221,209 1525.byte 102,15,56,221,217 1526.byte 102,15,56,221,225 1527 1528 movups (%rdi),%xmm10 1529 xorps %xmm10,%xmm2 1530 movups %xmm2,(%rsi) 1531 cmpq $2,%rdx 1532 jb L$ctr32_done 1533 1534 movups 16(%rdi),%xmm11 1535 xorps %xmm11,%xmm3 1536 movups %xmm3,16(%rsi) 1537 je L$ctr32_done 1538 1539 movups 32(%rdi),%xmm12 1540 xorps %xmm12,%xmm4 1541 movups %xmm4,32(%rsi) 1542 1543L$ctr32_done: 1544 xorps %xmm0,%xmm0 1545 xorl %r11d,%r11d 1546 pxor %xmm1,%xmm1 1547 pxor %xmm2,%xmm2 1548 pxor %xmm3,%xmm3 1549 pxor %xmm4,%xmm4 1550 pxor %xmm5,%xmm5 1551 pxor %xmm6,%xmm6 1552 pxor %xmm7,%xmm7 1553 movaps %xmm0,0(%rsp) 1554 pxor %xmm8,%xmm8 1555 movaps %xmm0,16(%rsp) 1556 pxor %xmm9,%xmm9 1557 movaps %xmm0,32(%rsp) 1558 pxor %xmm10,%xmm10 1559 movaps %xmm0,48(%rsp) 1560 pxor %xmm11,%xmm11 1561 movaps %xmm0,64(%rsp) 1562 pxor %xmm12,%xmm12 1563 movaps %xmm0,80(%rsp) 1564 pxor %xmm13,%xmm13 1565 movaps %xmm0,96(%rsp) 1566 pxor %xmm14,%xmm14 1567 movaps %xmm0,112(%rsp) 1568 pxor %xmm15,%xmm15 1569 leaq (%rbp),%rsp 1570 popq %rbp 1571L$ctr32_epilogue: 1572 .byte 0xf3,0xc3 1573 1574.globl _aesni_xts_encrypt 1575.private_extern _aesni_xts_encrypt 1576 1577.p2align 4 1578_aesni_xts_encrypt: 1579 leaq (%rsp),%rax 1580 pushq %rbp 1581 subq $112,%rsp 1582 andq $-16,%rsp 1583 leaq -8(%rax),%rbp 1584 movups (%r9),%xmm2 1585 movl 240(%r8),%eax 1586 movl 240(%rcx),%r10d 1587 movups (%r8),%xmm0 1588 movups 16(%r8),%xmm1 1589 leaq 32(%r8),%r8 1590 xorps %xmm0,%xmm2 1591L$oop_enc1_8: 1592.byte 102,15,56,220,209 1593 decl %eax 1594 movups (%r8),%xmm1 1595 leaq 16(%r8),%r8 1596 jnz L$oop_enc1_8 1597.byte 102,15,56,221,209 1598 movups (%rcx),%xmm0 1599 movq %rcx,%r11 1600 movl %r10d,%eax 1601 shll $4,%r10d 1602 movq %rdx,%r9 1603 andq $-16,%rdx 1604 1605 movups 16(%rcx,%r10,1),%xmm1 1606 1607 movdqa L$xts_magic(%rip),%xmm8 1608 movdqa %xmm2,%xmm15 1609 pshufd $95,%xmm2,%xmm9 1610 pxor %xmm0,%xmm1 1611 movdqa %xmm9,%xmm14 1612 paddd %xmm9,%xmm9 1613 movdqa %xmm15,%xmm10 1614 psrad $31,%xmm14 1615 paddq %xmm15,%xmm15 1616 pand %xmm8,%xmm14 1617 pxor %xmm0,%xmm10 1618 pxor %xmm14,%xmm15 1619 movdqa %xmm9,%xmm14 1620 paddd %xmm9,%xmm9 1621 movdqa %xmm15,%xmm11 1622 psrad $31,%xmm14 1623 paddq %xmm15,%xmm15 1624 pand %xmm8,%xmm14 1625 pxor %xmm0,%xmm11 1626 pxor %xmm14,%xmm15 1627 movdqa %xmm9,%xmm14 1628 paddd %xmm9,%xmm9 1629 movdqa %xmm15,%xmm12 1630 psrad $31,%xmm14 1631 paddq %xmm15,%xmm15 1632 pand %xmm8,%xmm14 1633 pxor %xmm0,%xmm12 1634 pxor %xmm14,%xmm15 1635 movdqa %xmm9,%xmm14 1636 paddd %xmm9,%xmm9 1637 movdqa %xmm15,%xmm13 1638 psrad $31,%xmm14 1639 paddq %xmm15,%xmm15 1640 pand %xmm8,%xmm14 1641 pxor %xmm0,%xmm13 1642 pxor %xmm14,%xmm15 1643 movdqa %xmm15,%xmm14 1644 psrad $31,%xmm9 1645 paddq %xmm15,%xmm15 1646 pand %xmm8,%xmm9 1647 pxor %xmm0,%xmm14 1648 pxor %xmm9,%xmm15 1649 movaps %xmm1,96(%rsp) 1650 1651 subq $96,%rdx 1652 jc L$xts_enc_short 1653 1654 movl $16+96,%eax 1655 leaq 32(%r11,%r10,1),%rcx 1656 subq %r10,%rax 1657 movups 16(%r11),%xmm1 1658 movq %rax,%r10 1659 leaq L$xts_magic(%rip),%r8 1660 jmp L$xts_enc_grandloop 1661 1662.p2align 5 1663L$xts_enc_grandloop: 1664 movdqu 0(%rdi),%xmm2 1665 movdqa %xmm0,%xmm8 1666 movdqu 16(%rdi),%xmm3 1667 pxor %xmm10,%xmm2 1668 movdqu 32(%rdi),%xmm4 1669 pxor %xmm11,%xmm3 1670.byte 102,15,56,220,209 1671 movdqu 48(%rdi),%xmm5 1672 pxor %xmm12,%xmm4 1673.byte 102,15,56,220,217 1674 movdqu 64(%rdi),%xmm6 1675 pxor %xmm13,%xmm5 1676.byte 102,15,56,220,225 1677 movdqu 80(%rdi),%xmm7 1678 pxor %xmm15,%xmm8 1679 movdqa 96(%rsp),%xmm9 1680 pxor %xmm14,%xmm6 1681.byte 102,15,56,220,233 1682 movups 32(%r11),%xmm0 1683 leaq 96(%rdi),%rdi 1684 pxor %xmm8,%xmm7 1685 1686 pxor %xmm9,%xmm10 1687.byte 102,15,56,220,241 1688 pxor %xmm9,%xmm11 1689 movdqa %xmm10,0(%rsp) 1690.byte 102,15,56,220,249 1691 movups 48(%r11),%xmm1 1692 pxor %xmm9,%xmm12 1693 1694.byte 102,15,56,220,208 1695 pxor %xmm9,%xmm13 1696 movdqa %xmm11,16(%rsp) 1697.byte 102,15,56,220,216 1698 pxor %xmm9,%xmm14 1699 movdqa %xmm12,32(%rsp) 1700.byte 102,15,56,220,224 1701.byte 102,15,56,220,232 1702 pxor %xmm9,%xmm8 1703 movdqa %xmm14,64(%rsp) 1704.byte 102,15,56,220,240 1705.byte 102,15,56,220,248 1706 movups 64(%r11),%xmm0 1707 movdqa %xmm8,80(%rsp) 1708 pshufd $95,%xmm15,%xmm9 1709 jmp L$xts_enc_loop6 1710.p2align 5 1711L$xts_enc_loop6: 1712.byte 102,15,56,220,209 1713.byte 102,15,56,220,217 1714.byte 102,15,56,220,225 1715.byte 102,15,56,220,233 1716.byte 102,15,56,220,241 1717.byte 102,15,56,220,249 1718 movups -64(%rcx,%rax,1),%xmm1 1719 addq $32,%rax 1720 1721.byte 102,15,56,220,208 1722.byte 102,15,56,220,216 1723.byte 102,15,56,220,224 1724.byte 102,15,56,220,232 1725.byte 102,15,56,220,240 1726.byte 102,15,56,220,248 1727 movups -80(%rcx,%rax,1),%xmm0 1728 jnz L$xts_enc_loop6 1729 1730 movdqa (%r8),%xmm8 1731 movdqa %xmm9,%xmm14 1732 paddd %xmm9,%xmm9 1733.byte 102,15,56,220,209 1734 paddq %xmm15,%xmm15 1735 psrad $31,%xmm14 1736.byte 102,15,56,220,217 1737 pand %xmm8,%xmm14 1738 movups (%r11),%xmm10 1739.byte 102,15,56,220,225 1740.byte 102,15,56,220,233 1741.byte 102,15,56,220,241 1742 pxor %xmm14,%xmm15 1743 movaps %xmm10,%xmm11 1744.byte 102,15,56,220,249 1745 movups -64(%rcx),%xmm1 1746 1747 movdqa %xmm9,%xmm14 1748.byte 102,15,56,220,208 1749 paddd %xmm9,%xmm9 1750 pxor %xmm15,%xmm10 1751.byte 102,15,56,220,216 1752 psrad $31,%xmm14 1753 paddq %xmm15,%xmm15 1754.byte 102,15,56,220,224 1755.byte 102,15,56,220,232 1756 pand %xmm8,%xmm14 1757 movaps %xmm11,%xmm12 1758.byte 102,15,56,220,240 1759 pxor %xmm14,%xmm15 1760 movdqa %xmm9,%xmm14 1761.byte 102,15,56,220,248 1762 movups -48(%rcx),%xmm0 1763 1764 paddd %xmm9,%xmm9 1765.byte 102,15,56,220,209 1766 pxor %xmm15,%xmm11 1767 psrad $31,%xmm14 1768.byte 102,15,56,220,217 1769 paddq %xmm15,%xmm15 1770 pand %xmm8,%xmm14 1771.byte 102,15,56,220,225 1772.byte 102,15,56,220,233 1773 movdqa %xmm13,48(%rsp) 1774 pxor %xmm14,%xmm15 1775.byte 102,15,56,220,241 1776 movaps %xmm12,%xmm13 1777 movdqa %xmm9,%xmm14 1778.byte 102,15,56,220,249 1779 movups -32(%rcx),%xmm1 1780 1781 paddd %xmm9,%xmm9 1782.byte 102,15,56,220,208 1783 pxor %xmm15,%xmm12 1784 psrad $31,%xmm14 1785.byte 102,15,56,220,216 1786 paddq %xmm15,%xmm15 1787 pand %xmm8,%xmm14 1788.byte 102,15,56,220,224 1789.byte 102,15,56,220,232 1790.byte 102,15,56,220,240 1791 pxor %xmm14,%xmm15 1792 movaps %xmm13,%xmm14 1793.byte 102,15,56,220,248 1794 1795 movdqa %xmm9,%xmm0 1796 paddd %xmm9,%xmm9 1797.byte 102,15,56,220,209 1798 pxor %xmm15,%xmm13 1799 psrad $31,%xmm0 1800.byte 102,15,56,220,217 1801 paddq %xmm15,%xmm15 1802 pand %xmm8,%xmm0 1803.byte 102,15,56,220,225 1804.byte 102,15,56,220,233 1805 pxor %xmm0,%xmm15 1806 movups (%r11),%xmm0 1807.byte 102,15,56,220,241 1808.byte 102,15,56,220,249 1809 movups 16(%r11),%xmm1 1810 1811 pxor %xmm15,%xmm14 1812.byte 102,15,56,221,84,36,0 1813 psrad $31,%xmm9 1814 paddq %xmm15,%xmm15 1815.byte 102,15,56,221,92,36,16 1816.byte 102,15,56,221,100,36,32 1817 pand %xmm8,%xmm9 1818 movq %r10,%rax 1819.byte 102,15,56,221,108,36,48 1820.byte 102,15,56,221,116,36,64 1821.byte 102,15,56,221,124,36,80 1822 pxor %xmm9,%xmm15 1823 1824 leaq 96(%rsi),%rsi 1825 movups %xmm2,-96(%rsi) 1826 movups %xmm3,-80(%rsi) 1827 movups %xmm4,-64(%rsi) 1828 movups %xmm5,-48(%rsi) 1829 movups %xmm6,-32(%rsi) 1830 movups %xmm7,-16(%rsi) 1831 subq $96,%rdx 1832 jnc L$xts_enc_grandloop 1833 1834 movl $16+96,%eax 1835 subl %r10d,%eax 1836 movq %r11,%rcx 1837 shrl $4,%eax 1838 1839L$xts_enc_short: 1840 1841 movl %eax,%r10d 1842 pxor %xmm0,%xmm10 1843 addq $96,%rdx 1844 jz L$xts_enc_done 1845 1846 pxor %xmm0,%xmm11 1847 cmpq $32,%rdx 1848 jb L$xts_enc_one 1849 pxor %xmm0,%xmm12 1850 je L$xts_enc_two 1851 1852 pxor %xmm0,%xmm13 1853 cmpq $64,%rdx 1854 jb L$xts_enc_three 1855 pxor %xmm0,%xmm14 1856 je L$xts_enc_four 1857 1858 movdqu (%rdi),%xmm2 1859 movdqu 16(%rdi),%xmm3 1860 movdqu 32(%rdi),%xmm4 1861 pxor %xmm10,%xmm2 1862 movdqu 48(%rdi),%xmm5 1863 pxor %xmm11,%xmm3 1864 movdqu 64(%rdi),%xmm6 1865 leaq 80(%rdi),%rdi 1866 pxor %xmm12,%xmm4 1867 pxor %xmm13,%xmm5 1868 pxor %xmm14,%xmm6 1869 pxor %xmm7,%xmm7 1870 1871 call _aesni_encrypt6 1872 1873 xorps %xmm10,%xmm2 1874 movdqa %xmm15,%xmm10 1875 xorps %xmm11,%xmm3 1876 xorps %xmm12,%xmm4 1877 movdqu %xmm2,(%rsi) 1878 xorps %xmm13,%xmm5 1879 movdqu %xmm3,16(%rsi) 1880 xorps %xmm14,%xmm6 1881 movdqu %xmm4,32(%rsi) 1882 movdqu %xmm5,48(%rsi) 1883 movdqu %xmm6,64(%rsi) 1884 leaq 80(%rsi),%rsi 1885 jmp L$xts_enc_done 1886 1887.p2align 4 1888L$xts_enc_one: 1889 movups (%rdi),%xmm2 1890 leaq 16(%rdi),%rdi 1891 xorps %xmm10,%xmm2 1892 movups (%rcx),%xmm0 1893 movups 16(%rcx),%xmm1 1894 leaq 32(%rcx),%rcx 1895 xorps %xmm0,%xmm2 1896L$oop_enc1_9: 1897.byte 102,15,56,220,209 1898 decl %eax 1899 movups (%rcx),%xmm1 1900 leaq 16(%rcx),%rcx 1901 jnz L$oop_enc1_9 1902.byte 102,15,56,221,209 1903 xorps %xmm10,%xmm2 1904 movdqa %xmm11,%xmm10 1905 movups %xmm2,(%rsi) 1906 leaq 16(%rsi),%rsi 1907 jmp L$xts_enc_done 1908 1909.p2align 4 1910L$xts_enc_two: 1911 movups (%rdi),%xmm2 1912 movups 16(%rdi),%xmm3 1913 leaq 32(%rdi),%rdi 1914 xorps %xmm10,%xmm2 1915 xorps %xmm11,%xmm3 1916 1917 call _aesni_encrypt2 1918 1919 xorps %xmm10,%xmm2 1920 movdqa %xmm12,%xmm10 1921 xorps %xmm11,%xmm3 1922 movups %xmm2,(%rsi) 1923 movups %xmm3,16(%rsi) 1924 leaq 32(%rsi),%rsi 1925 jmp L$xts_enc_done 1926 1927.p2align 4 1928L$xts_enc_three: 1929 movups (%rdi),%xmm2 1930 movups 16(%rdi),%xmm3 1931 movups 32(%rdi),%xmm4 1932 leaq 48(%rdi),%rdi 1933 xorps %xmm10,%xmm2 1934 xorps %xmm11,%xmm3 1935 xorps %xmm12,%xmm4 1936 1937 call _aesni_encrypt3 1938 1939 xorps %xmm10,%xmm2 1940 movdqa %xmm13,%xmm10 1941 xorps %xmm11,%xmm3 1942 xorps %xmm12,%xmm4 1943 movups %xmm2,(%rsi) 1944 movups %xmm3,16(%rsi) 1945 movups %xmm4,32(%rsi) 1946 leaq 48(%rsi),%rsi 1947 jmp L$xts_enc_done 1948 1949.p2align 4 1950L$xts_enc_four: 1951 movups (%rdi),%xmm2 1952 movups 16(%rdi),%xmm3 1953 movups 32(%rdi),%xmm4 1954 xorps %xmm10,%xmm2 1955 movups 48(%rdi),%xmm5 1956 leaq 64(%rdi),%rdi 1957 xorps %xmm11,%xmm3 1958 xorps %xmm12,%xmm4 1959 xorps %xmm13,%xmm5 1960 1961 call _aesni_encrypt4 1962 1963 pxor %xmm10,%xmm2 1964 movdqa %xmm14,%xmm10 1965 pxor %xmm11,%xmm3 1966 pxor %xmm12,%xmm4 1967 movdqu %xmm2,(%rsi) 1968 pxor %xmm13,%xmm5 1969 movdqu %xmm3,16(%rsi) 1970 movdqu %xmm4,32(%rsi) 1971 movdqu %xmm5,48(%rsi) 1972 leaq 64(%rsi),%rsi 1973 jmp L$xts_enc_done 1974 1975.p2align 4 1976L$xts_enc_done: 1977 andq $15,%r9 1978 jz L$xts_enc_ret 1979 movq %r9,%rdx 1980 1981L$xts_enc_steal: 1982 movzbl (%rdi),%eax 1983 movzbl -16(%rsi),%ecx 1984 leaq 1(%rdi),%rdi 1985 movb %al,-16(%rsi) 1986 movb %cl,0(%rsi) 1987 leaq 1(%rsi),%rsi 1988 subq $1,%rdx 1989 jnz L$xts_enc_steal 1990 1991 subq %r9,%rsi 1992 movq %r11,%rcx 1993 movl %r10d,%eax 1994 1995 movups -16(%rsi),%xmm2 1996 xorps %xmm10,%xmm2 1997 movups (%rcx),%xmm0 1998 movups 16(%rcx),%xmm1 1999 leaq 32(%rcx),%rcx 2000 xorps %xmm0,%xmm2 2001L$oop_enc1_10: 2002.byte 102,15,56,220,209 2003 decl %eax 2004 movups (%rcx),%xmm1 2005 leaq 16(%rcx),%rcx 2006 jnz L$oop_enc1_10 2007.byte 102,15,56,221,209 2008 xorps %xmm10,%xmm2 2009 movups %xmm2,-16(%rsi) 2010 2011L$xts_enc_ret: 2012 xorps %xmm0,%xmm0 2013 pxor %xmm1,%xmm1 2014 pxor %xmm2,%xmm2 2015 pxor %xmm3,%xmm3 2016 pxor %xmm4,%xmm4 2017 pxor %xmm5,%xmm5 2018 pxor %xmm6,%xmm6 2019 pxor %xmm7,%xmm7 2020 movaps %xmm0,0(%rsp) 2021 pxor %xmm8,%xmm8 2022 movaps %xmm0,16(%rsp) 2023 pxor %xmm9,%xmm9 2024 movaps %xmm0,32(%rsp) 2025 pxor %xmm10,%xmm10 2026 movaps %xmm0,48(%rsp) 2027 pxor %xmm11,%xmm11 2028 movaps %xmm0,64(%rsp) 2029 pxor %xmm12,%xmm12 2030 movaps %xmm0,80(%rsp) 2031 pxor %xmm13,%xmm13 2032 movaps %xmm0,96(%rsp) 2033 pxor %xmm14,%xmm14 2034 pxor %xmm15,%xmm15 2035 leaq (%rbp),%rsp 2036 popq %rbp 2037L$xts_enc_epilogue: 2038 .byte 0xf3,0xc3 2039 2040.globl _aesni_xts_decrypt 2041.private_extern _aesni_xts_decrypt 2042 2043.p2align 4 2044_aesni_xts_decrypt: 2045 leaq (%rsp),%rax 2046 pushq %rbp 2047 subq $112,%rsp 2048 andq $-16,%rsp 2049 leaq -8(%rax),%rbp 2050 movups (%r9),%xmm2 2051 movl 240(%r8),%eax 2052 movl 240(%rcx),%r10d 2053 movups (%r8),%xmm0 2054 movups 16(%r8),%xmm1 2055 leaq 32(%r8),%r8 2056 xorps %xmm0,%xmm2 2057L$oop_enc1_11: 2058.byte 102,15,56,220,209 2059 decl %eax 2060 movups (%r8),%xmm1 2061 leaq 16(%r8),%r8 2062 jnz L$oop_enc1_11 2063.byte 102,15,56,221,209 2064 xorl %eax,%eax 2065 testq $15,%rdx 2066 setnz %al 2067 shlq $4,%rax 2068 subq %rax,%rdx 2069 2070 movups (%rcx),%xmm0 2071 movq %rcx,%r11 2072 movl %r10d,%eax 2073 shll $4,%r10d 2074 movq %rdx,%r9 2075 andq $-16,%rdx 2076 2077 movups 16(%rcx,%r10,1),%xmm1 2078 2079 movdqa L$xts_magic(%rip),%xmm8 2080 movdqa %xmm2,%xmm15 2081 pshufd $95,%xmm2,%xmm9 2082 pxor %xmm0,%xmm1 2083 movdqa %xmm9,%xmm14 2084 paddd %xmm9,%xmm9 2085 movdqa %xmm15,%xmm10 2086 psrad $31,%xmm14 2087 paddq %xmm15,%xmm15 2088 pand %xmm8,%xmm14 2089 pxor %xmm0,%xmm10 2090 pxor %xmm14,%xmm15 2091 movdqa %xmm9,%xmm14 2092 paddd %xmm9,%xmm9 2093 movdqa %xmm15,%xmm11 2094 psrad $31,%xmm14 2095 paddq %xmm15,%xmm15 2096 pand %xmm8,%xmm14 2097 pxor %xmm0,%xmm11 2098 pxor %xmm14,%xmm15 2099 movdqa %xmm9,%xmm14 2100 paddd %xmm9,%xmm9 2101 movdqa %xmm15,%xmm12 2102 psrad $31,%xmm14 2103 paddq %xmm15,%xmm15 2104 pand %xmm8,%xmm14 2105 pxor %xmm0,%xmm12 2106 pxor %xmm14,%xmm15 2107 movdqa %xmm9,%xmm14 2108 paddd %xmm9,%xmm9 2109 movdqa %xmm15,%xmm13 2110 psrad $31,%xmm14 2111 paddq %xmm15,%xmm15 2112 pand %xmm8,%xmm14 2113 pxor %xmm0,%xmm13 2114 pxor %xmm14,%xmm15 2115 movdqa %xmm15,%xmm14 2116 psrad $31,%xmm9 2117 paddq %xmm15,%xmm15 2118 pand %xmm8,%xmm9 2119 pxor %xmm0,%xmm14 2120 pxor %xmm9,%xmm15 2121 movaps %xmm1,96(%rsp) 2122 2123 subq $96,%rdx 2124 jc L$xts_dec_short 2125 2126 movl $16+96,%eax 2127 leaq 32(%r11,%r10,1),%rcx 2128 subq %r10,%rax 2129 movups 16(%r11),%xmm1 2130 movq %rax,%r10 2131 leaq L$xts_magic(%rip),%r8 2132 jmp L$xts_dec_grandloop 2133 2134.p2align 5 2135L$xts_dec_grandloop: 2136 movdqu 0(%rdi),%xmm2 2137 movdqa %xmm0,%xmm8 2138 movdqu 16(%rdi),%xmm3 2139 pxor %xmm10,%xmm2 2140 movdqu 32(%rdi),%xmm4 2141 pxor %xmm11,%xmm3 2142.byte 102,15,56,222,209 2143 movdqu 48(%rdi),%xmm5 2144 pxor %xmm12,%xmm4 2145.byte 102,15,56,222,217 2146 movdqu 64(%rdi),%xmm6 2147 pxor %xmm13,%xmm5 2148.byte 102,15,56,222,225 2149 movdqu 80(%rdi),%xmm7 2150 pxor %xmm15,%xmm8 2151 movdqa 96(%rsp),%xmm9 2152 pxor %xmm14,%xmm6 2153.byte 102,15,56,222,233 2154 movups 32(%r11),%xmm0 2155 leaq 96(%rdi),%rdi 2156 pxor %xmm8,%xmm7 2157 2158 pxor %xmm9,%xmm10 2159.byte 102,15,56,222,241 2160 pxor %xmm9,%xmm11 2161 movdqa %xmm10,0(%rsp) 2162.byte 102,15,56,222,249 2163 movups 48(%r11),%xmm1 2164 pxor %xmm9,%xmm12 2165 2166.byte 102,15,56,222,208 2167 pxor %xmm9,%xmm13 2168 movdqa %xmm11,16(%rsp) 2169.byte 102,15,56,222,216 2170 pxor %xmm9,%xmm14 2171 movdqa %xmm12,32(%rsp) 2172.byte 102,15,56,222,224 2173.byte 102,15,56,222,232 2174 pxor %xmm9,%xmm8 2175 movdqa %xmm14,64(%rsp) 2176.byte 102,15,56,222,240 2177.byte 102,15,56,222,248 2178 movups 64(%r11),%xmm0 2179 movdqa %xmm8,80(%rsp) 2180 pshufd $95,%xmm15,%xmm9 2181 jmp L$xts_dec_loop6 2182.p2align 5 2183L$xts_dec_loop6: 2184.byte 102,15,56,222,209 2185.byte 102,15,56,222,217 2186.byte 102,15,56,222,225 2187.byte 102,15,56,222,233 2188.byte 102,15,56,222,241 2189.byte 102,15,56,222,249 2190 movups -64(%rcx,%rax,1),%xmm1 2191 addq $32,%rax 2192 2193.byte 102,15,56,222,208 2194.byte 102,15,56,222,216 2195.byte 102,15,56,222,224 2196.byte 102,15,56,222,232 2197.byte 102,15,56,222,240 2198.byte 102,15,56,222,248 2199 movups -80(%rcx,%rax,1),%xmm0 2200 jnz L$xts_dec_loop6 2201 2202 movdqa (%r8),%xmm8 2203 movdqa %xmm9,%xmm14 2204 paddd %xmm9,%xmm9 2205.byte 102,15,56,222,209 2206 paddq %xmm15,%xmm15 2207 psrad $31,%xmm14 2208.byte 102,15,56,222,217 2209 pand %xmm8,%xmm14 2210 movups (%r11),%xmm10 2211.byte 102,15,56,222,225 2212.byte 102,15,56,222,233 2213.byte 102,15,56,222,241 2214 pxor %xmm14,%xmm15 2215 movaps %xmm10,%xmm11 2216.byte 102,15,56,222,249 2217 movups -64(%rcx),%xmm1 2218 2219 movdqa %xmm9,%xmm14 2220.byte 102,15,56,222,208 2221 paddd %xmm9,%xmm9 2222 pxor %xmm15,%xmm10 2223.byte 102,15,56,222,216 2224 psrad $31,%xmm14 2225 paddq %xmm15,%xmm15 2226.byte 102,15,56,222,224 2227.byte 102,15,56,222,232 2228 pand %xmm8,%xmm14 2229 movaps %xmm11,%xmm12 2230.byte 102,15,56,222,240 2231 pxor %xmm14,%xmm15 2232 movdqa %xmm9,%xmm14 2233.byte 102,15,56,222,248 2234 movups -48(%rcx),%xmm0 2235 2236 paddd %xmm9,%xmm9 2237.byte 102,15,56,222,209 2238 pxor %xmm15,%xmm11 2239 psrad $31,%xmm14 2240.byte 102,15,56,222,217 2241 paddq %xmm15,%xmm15 2242 pand %xmm8,%xmm14 2243.byte 102,15,56,222,225 2244.byte 102,15,56,222,233 2245 movdqa %xmm13,48(%rsp) 2246 pxor %xmm14,%xmm15 2247.byte 102,15,56,222,241 2248 movaps %xmm12,%xmm13 2249 movdqa %xmm9,%xmm14 2250.byte 102,15,56,222,249 2251 movups -32(%rcx),%xmm1 2252 2253 paddd %xmm9,%xmm9 2254.byte 102,15,56,222,208 2255 pxor %xmm15,%xmm12 2256 psrad $31,%xmm14 2257.byte 102,15,56,222,216 2258 paddq %xmm15,%xmm15 2259 pand %xmm8,%xmm14 2260.byte 102,15,56,222,224 2261.byte 102,15,56,222,232 2262.byte 102,15,56,222,240 2263 pxor %xmm14,%xmm15 2264 movaps %xmm13,%xmm14 2265.byte 102,15,56,222,248 2266 2267 movdqa %xmm9,%xmm0 2268 paddd %xmm9,%xmm9 2269.byte 102,15,56,222,209 2270 pxor %xmm15,%xmm13 2271 psrad $31,%xmm0 2272.byte 102,15,56,222,217 2273 paddq %xmm15,%xmm15 2274 pand %xmm8,%xmm0 2275.byte 102,15,56,222,225 2276.byte 102,15,56,222,233 2277 pxor %xmm0,%xmm15 2278 movups (%r11),%xmm0 2279.byte 102,15,56,222,241 2280.byte 102,15,56,222,249 2281 movups 16(%r11),%xmm1 2282 2283 pxor %xmm15,%xmm14 2284.byte 102,15,56,223,84,36,0 2285 psrad $31,%xmm9 2286 paddq %xmm15,%xmm15 2287.byte 102,15,56,223,92,36,16 2288.byte 102,15,56,223,100,36,32 2289 pand %xmm8,%xmm9 2290 movq %r10,%rax 2291.byte 102,15,56,223,108,36,48 2292.byte 102,15,56,223,116,36,64 2293.byte 102,15,56,223,124,36,80 2294 pxor %xmm9,%xmm15 2295 2296 leaq 96(%rsi),%rsi 2297 movups %xmm2,-96(%rsi) 2298 movups %xmm3,-80(%rsi) 2299 movups %xmm4,-64(%rsi) 2300 movups %xmm5,-48(%rsi) 2301 movups %xmm6,-32(%rsi) 2302 movups %xmm7,-16(%rsi) 2303 subq $96,%rdx 2304 jnc L$xts_dec_grandloop 2305 2306 movl $16+96,%eax 2307 subl %r10d,%eax 2308 movq %r11,%rcx 2309 shrl $4,%eax 2310 2311L$xts_dec_short: 2312 2313 movl %eax,%r10d 2314 pxor %xmm0,%xmm10 2315 pxor %xmm0,%xmm11 2316 addq $96,%rdx 2317 jz L$xts_dec_done 2318 2319 pxor %xmm0,%xmm12 2320 cmpq $32,%rdx 2321 jb L$xts_dec_one 2322 pxor %xmm0,%xmm13 2323 je L$xts_dec_two 2324 2325 pxor %xmm0,%xmm14 2326 cmpq $64,%rdx 2327 jb L$xts_dec_three 2328 je L$xts_dec_four 2329 2330 movdqu (%rdi),%xmm2 2331 movdqu 16(%rdi),%xmm3 2332 movdqu 32(%rdi),%xmm4 2333 pxor %xmm10,%xmm2 2334 movdqu 48(%rdi),%xmm5 2335 pxor %xmm11,%xmm3 2336 movdqu 64(%rdi),%xmm6 2337 leaq 80(%rdi),%rdi 2338 pxor %xmm12,%xmm4 2339 pxor %xmm13,%xmm5 2340 pxor %xmm14,%xmm6 2341 2342 call _aesni_decrypt6 2343 2344 xorps %xmm10,%xmm2 2345 xorps %xmm11,%xmm3 2346 xorps %xmm12,%xmm4 2347 movdqu %xmm2,(%rsi) 2348 xorps %xmm13,%xmm5 2349 movdqu %xmm3,16(%rsi) 2350 xorps %xmm14,%xmm6 2351 movdqu %xmm4,32(%rsi) 2352 pxor %xmm14,%xmm14 2353 movdqu %xmm5,48(%rsi) 2354 pcmpgtd %xmm15,%xmm14 2355 movdqu %xmm6,64(%rsi) 2356 leaq 80(%rsi),%rsi 2357 pshufd $19,%xmm14,%xmm11 2358 andq $15,%r9 2359 jz L$xts_dec_ret 2360 2361 movdqa %xmm15,%xmm10 2362 paddq %xmm15,%xmm15 2363 pand %xmm8,%xmm11 2364 pxor %xmm15,%xmm11 2365 jmp L$xts_dec_done2 2366 2367.p2align 4 2368L$xts_dec_one: 2369 movups (%rdi),%xmm2 2370 leaq 16(%rdi),%rdi 2371 xorps %xmm10,%xmm2 2372 movups (%rcx),%xmm0 2373 movups 16(%rcx),%xmm1 2374 leaq 32(%rcx),%rcx 2375 xorps %xmm0,%xmm2 2376L$oop_dec1_12: 2377.byte 102,15,56,222,209 2378 decl %eax 2379 movups (%rcx),%xmm1 2380 leaq 16(%rcx),%rcx 2381 jnz L$oop_dec1_12 2382.byte 102,15,56,223,209 2383 xorps %xmm10,%xmm2 2384 movdqa %xmm11,%xmm10 2385 movups %xmm2,(%rsi) 2386 movdqa %xmm12,%xmm11 2387 leaq 16(%rsi),%rsi 2388 jmp L$xts_dec_done 2389 2390.p2align 4 2391L$xts_dec_two: 2392 movups (%rdi),%xmm2 2393 movups 16(%rdi),%xmm3 2394 leaq 32(%rdi),%rdi 2395 xorps %xmm10,%xmm2 2396 xorps %xmm11,%xmm3 2397 2398 call _aesni_decrypt2 2399 2400 xorps %xmm10,%xmm2 2401 movdqa %xmm12,%xmm10 2402 xorps %xmm11,%xmm3 2403 movdqa %xmm13,%xmm11 2404 movups %xmm2,(%rsi) 2405 movups %xmm3,16(%rsi) 2406 leaq 32(%rsi),%rsi 2407 jmp L$xts_dec_done 2408 2409.p2align 4 2410L$xts_dec_three: 2411 movups (%rdi),%xmm2 2412 movups 16(%rdi),%xmm3 2413 movups 32(%rdi),%xmm4 2414 leaq 48(%rdi),%rdi 2415 xorps %xmm10,%xmm2 2416 xorps %xmm11,%xmm3 2417 xorps %xmm12,%xmm4 2418 2419 call _aesni_decrypt3 2420 2421 xorps %xmm10,%xmm2 2422 movdqa %xmm13,%xmm10 2423 xorps %xmm11,%xmm3 2424 movdqa %xmm14,%xmm11 2425 xorps %xmm12,%xmm4 2426 movups %xmm2,(%rsi) 2427 movups %xmm3,16(%rsi) 2428 movups %xmm4,32(%rsi) 2429 leaq 48(%rsi),%rsi 2430 jmp L$xts_dec_done 2431 2432.p2align 4 2433L$xts_dec_four: 2434 movups (%rdi),%xmm2 2435 movups 16(%rdi),%xmm3 2436 movups 32(%rdi),%xmm4 2437 xorps %xmm10,%xmm2 2438 movups 48(%rdi),%xmm5 2439 leaq 64(%rdi),%rdi 2440 xorps %xmm11,%xmm3 2441 xorps %xmm12,%xmm4 2442 xorps %xmm13,%xmm5 2443 2444 call _aesni_decrypt4 2445 2446 pxor %xmm10,%xmm2 2447 movdqa %xmm14,%xmm10 2448 pxor %xmm11,%xmm3 2449 movdqa %xmm15,%xmm11 2450 pxor %xmm12,%xmm4 2451 movdqu %xmm2,(%rsi) 2452 pxor %xmm13,%xmm5 2453 movdqu %xmm3,16(%rsi) 2454 movdqu %xmm4,32(%rsi) 2455 movdqu %xmm5,48(%rsi) 2456 leaq 64(%rsi),%rsi 2457 jmp L$xts_dec_done 2458 2459.p2align 4 2460L$xts_dec_done: 2461 andq $15,%r9 2462 jz L$xts_dec_ret 2463L$xts_dec_done2: 2464 movq %r9,%rdx 2465 movq %r11,%rcx 2466 movl %r10d,%eax 2467 2468 movups (%rdi),%xmm2 2469 xorps %xmm11,%xmm2 2470 movups (%rcx),%xmm0 2471 movups 16(%rcx),%xmm1 2472 leaq 32(%rcx),%rcx 2473 xorps %xmm0,%xmm2 2474L$oop_dec1_13: 2475.byte 102,15,56,222,209 2476 decl %eax 2477 movups (%rcx),%xmm1 2478 leaq 16(%rcx),%rcx 2479 jnz L$oop_dec1_13 2480.byte 102,15,56,223,209 2481 xorps %xmm11,%xmm2 2482 movups %xmm2,(%rsi) 2483 2484L$xts_dec_steal: 2485 movzbl 16(%rdi),%eax 2486 movzbl (%rsi),%ecx 2487 leaq 1(%rdi),%rdi 2488 movb %al,(%rsi) 2489 movb %cl,16(%rsi) 2490 leaq 1(%rsi),%rsi 2491 subq $1,%rdx 2492 jnz L$xts_dec_steal 2493 2494 subq %r9,%rsi 2495 movq %r11,%rcx 2496 movl %r10d,%eax 2497 2498 movups (%rsi),%xmm2 2499 xorps %xmm10,%xmm2 2500 movups (%rcx),%xmm0 2501 movups 16(%rcx),%xmm1 2502 leaq 32(%rcx),%rcx 2503 xorps %xmm0,%xmm2 2504L$oop_dec1_14: 2505.byte 102,15,56,222,209 2506 decl %eax 2507 movups (%rcx),%xmm1 2508 leaq 16(%rcx),%rcx 2509 jnz L$oop_dec1_14 2510.byte 102,15,56,223,209 2511 xorps %xmm10,%xmm2 2512 movups %xmm2,(%rsi) 2513 2514L$xts_dec_ret: 2515 xorps %xmm0,%xmm0 2516 pxor %xmm1,%xmm1 2517 pxor %xmm2,%xmm2 2518 pxor %xmm3,%xmm3 2519 pxor %xmm4,%xmm4 2520 pxor %xmm5,%xmm5 2521 pxor %xmm6,%xmm6 2522 pxor %xmm7,%xmm7 2523 movaps %xmm0,0(%rsp) 2524 pxor %xmm8,%xmm8 2525 movaps %xmm0,16(%rsp) 2526 pxor %xmm9,%xmm9 2527 movaps %xmm0,32(%rsp) 2528 pxor %xmm10,%xmm10 2529 movaps %xmm0,48(%rsp) 2530 pxor %xmm11,%xmm11 2531 movaps %xmm0,64(%rsp) 2532 pxor %xmm12,%xmm12 2533 movaps %xmm0,80(%rsp) 2534 pxor %xmm13,%xmm13 2535 movaps %xmm0,96(%rsp) 2536 pxor %xmm14,%xmm14 2537 pxor %xmm15,%xmm15 2538 leaq (%rbp),%rsp 2539 popq %rbp 2540L$xts_dec_epilogue: 2541 .byte 0xf3,0xc3 2542 2543.globl _aesni_cbc_encrypt 2544.private_extern _aesni_cbc_encrypt 2545 2546.p2align 4 2547_aesni_cbc_encrypt: 2548 testq %rdx,%rdx 2549 jz L$cbc_ret 2550 2551 movl 240(%rcx),%r10d 2552 movq %rcx,%r11 2553 testl %r9d,%r9d 2554 jz L$cbc_decrypt 2555 2556 movups (%r8),%xmm2 2557 movl %r10d,%eax 2558 cmpq $16,%rdx 2559 jb L$cbc_enc_tail 2560 subq $16,%rdx 2561 jmp L$cbc_enc_loop 2562.p2align 4 2563L$cbc_enc_loop: 2564 movups (%rdi),%xmm3 2565 leaq 16(%rdi),%rdi 2566 2567 movups (%rcx),%xmm0 2568 movups 16(%rcx),%xmm1 2569 xorps %xmm0,%xmm3 2570 leaq 32(%rcx),%rcx 2571 xorps %xmm3,%xmm2 2572L$oop_enc1_15: 2573.byte 102,15,56,220,209 2574 decl %eax 2575 movups (%rcx),%xmm1 2576 leaq 16(%rcx),%rcx 2577 jnz L$oop_enc1_15 2578.byte 102,15,56,221,209 2579 movl %r10d,%eax 2580 movq %r11,%rcx 2581 movups %xmm2,0(%rsi) 2582 leaq 16(%rsi),%rsi 2583 subq $16,%rdx 2584 jnc L$cbc_enc_loop 2585 addq $16,%rdx 2586 jnz L$cbc_enc_tail 2587 pxor %xmm0,%xmm0 2588 pxor %xmm1,%xmm1 2589 movups %xmm2,(%r8) 2590 pxor %xmm2,%xmm2 2591 pxor %xmm3,%xmm3 2592 jmp L$cbc_ret 2593 2594L$cbc_enc_tail: 2595 movq %rdx,%rcx 2596 xchgq %rdi,%rsi 2597.long 0x9066A4F3 2598 movl $16,%ecx 2599 subq %rdx,%rcx 2600 xorl %eax,%eax 2601.long 0x9066AAF3 2602 leaq -16(%rdi),%rdi 2603 movl %r10d,%eax 2604 movq %rdi,%rsi 2605 movq %r11,%rcx 2606 xorq %rdx,%rdx 2607 jmp L$cbc_enc_loop 2608 2609.p2align 4 2610L$cbc_decrypt: 2611 cmpq $16,%rdx 2612 jne L$cbc_decrypt_bulk 2613 2614 2615 2616 movdqu (%rdi),%xmm2 2617 movdqu (%r8),%xmm3 2618 movdqa %xmm2,%xmm4 2619 movups (%rcx),%xmm0 2620 movups 16(%rcx),%xmm1 2621 leaq 32(%rcx),%rcx 2622 xorps %xmm0,%xmm2 2623L$oop_dec1_16: 2624.byte 102,15,56,222,209 2625 decl %r10d 2626 movups (%rcx),%xmm1 2627 leaq 16(%rcx),%rcx 2628 jnz L$oop_dec1_16 2629.byte 102,15,56,223,209 2630 pxor %xmm0,%xmm0 2631 pxor %xmm1,%xmm1 2632 movdqu %xmm4,(%r8) 2633 xorps %xmm3,%xmm2 2634 pxor %xmm3,%xmm3 2635 movups %xmm2,(%rsi) 2636 pxor %xmm2,%xmm2 2637 jmp L$cbc_ret 2638.p2align 4 2639L$cbc_decrypt_bulk: 2640 leaq (%rsp),%rax 2641 pushq %rbp 2642 subq $16,%rsp 2643 andq $-16,%rsp 2644 leaq -8(%rax),%rbp 2645 movups (%r8),%xmm10 2646 movl %r10d,%eax 2647 cmpq $80,%rdx 2648 jbe L$cbc_dec_tail 2649 2650 movups (%rcx),%xmm0 2651 movdqu 0(%rdi),%xmm2 2652 movdqu 16(%rdi),%xmm3 2653 movdqa %xmm2,%xmm11 2654 movdqu 32(%rdi),%xmm4 2655 movdqa %xmm3,%xmm12 2656 movdqu 48(%rdi),%xmm5 2657 movdqa %xmm4,%xmm13 2658 movdqu 64(%rdi),%xmm6 2659 movdqa %xmm5,%xmm14 2660 movdqu 80(%rdi),%xmm7 2661 movdqa %xmm6,%xmm15 2662 movl _OPENSSL_ia32cap_P+4(%rip),%r9d 2663 cmpq $112,%rdx 2664 jbe L$cbc_dec_six_or_seven 2665 2666 andl $71303168,%r9d 2667 subq $80,%rdx 2668 cmpl $4194304,%r9d 2669 je L$cbc_dec_loop6_enter 2670 subq $32,%rdx 2671 leaq 112(%rcx),%rcx 2672 jmp L$cbc_dec_loop8_enter 2673.p2align 4 2674L$cbc_dec_loop8: 2675 movups %xmm9,(%rsi) 2676 leaq 16(%rsi),%rsi 2677L$cbc_dec_loop8_enter: 2678 movdqu 96(%rdi),%xmm8 2679 pxor %xmm0,%xmm2 2680 movdqu 112(%rdi),%xmm9 2681 pxor %xmm0,%xmm3 2682 movups 16-112(%rcx),%xmm1 2683 pxor %xmm0,%xmm4 2684 xorq %r11,%r11 2685 cmpq $112,%rdx 2686 pxor %xmm0,%xmm5 2687 pxor %xmm0,%xmm6 2688 pxor %xmm0,%xmm7 2689 pxor %xmm0,%xmm8 2690 2691.byte 102,15,56,222,209 2692 pxor %xmm0,%xmm9 2693 movups 32-112(%rcx),%xmm0 2694.byte 102,15,56,222,217 2695.byte 102,15,56,222,225 2696.byte 102,15,56,222,233 2697.byte 102,15,56,222,241 2698.byte 102,15,56,222,249 2699.byte 102,68,15,56,222,193 2700 setnc %r11b 2701 shlq $7,%r11 2702.byte 102,68,15,56,222,201 2703 addq %rdi,%r11 2704 movups 48-112(%rcx),%xmm1 2705.byte 102,15,56,222,208 2706.byte 102,15,56,222,216 2707.byte 102,15,56,222,224 2708.byte 102,15,56,222,232 2709.byte 102,15,56,222,240 2710.byte 102,15,56,222,248 2711.byte 102,68,15,56,222,192 2712.byte 102,68,15,56,222,200 2713 movups 64-112(%rcx),%xmm0 2714 nop 2715.byte 102,15,56,222,209 2716.byte 102,15,56,222,217 2717.byte 102,15,56,222,225 2718.byte 102,15,56,222,233 2719.byte 102,15,56,222,241 2720.byte 102,15,56,222,249 2721.byte 102,68,15,56,222,193 2722.byte 102,68,15,56,222,201 2723 movups 80-112(%rcx),%xmm1 2724 nop 2725.byte 102,15,56,222,208 2726.byte 102,15,56,222,216 2727.byte 102,15,56,222,224 2728.byte 102,15,56,222,232 2729.byte 102,15,56,222,240 2730.byte 102,15,56,222,248 2731.byte 102,68,15,56,222,192 2732.byte 102,68,15,56,222,200 2733 movups 96-112(%rcx),%xmm0 2734 nop 2735.byte 102,15,56,222,209 2736.byte 102,15,56,222,217 2737.byte 102,15,56,222,225 2738.byte 102,15,56,222,233 2739.byte 102,15,56,222,241 2740.byte 102,15,56,222,249 2741.byte 102,68,15,56,222,193 2742.byte 102,68,15,56,222,201 2743 movups 112-112(%rcx),%xmm1 2744 nop 2745.byte 102,15,56,222,208 2746.byte 102,15,56,222,216 2747.byte 102,15,56,222,224 2748.byte 102,15,56,222,232 2749.byte 102,15,56,222,240 2750.byte 102,15,56,222,248 2751.byte 102,68,15,56,222,192 2752.byte 102,68,15,56,222,200 2753 movups 128-112(%rcx),%xmm0 2754 nop 2755.byte 102,15,56,222,209 2756.byte 102,15,56,222,217 2757.byte 102,15,56,222,225 2758.byte 102,15,56,222,233 2759.byte 102,15,56,222,241 2760.byte 102,15,56,222,249 2761.byte 102,68,15,56,222,193 2762.byte 102,68,15,56,222,201 2763 movups 144-112(%rcx),%xmm1 2764 cmpl $11,%eax 2765.byte 102,15,56,222,208 2766.byte 102,15,56,222,216 2767.byte 102,15,56,222,224 2768.byte 102,15,56,222,232 2769.byte 102,15,56,222,240 2770.byte 102,15,56,222,248 2771.byte 102,68,15,56,222,192 2772.byte 102,68,15,56,222,200 2773 movups 160-112(%rcx),%xmm0 2774 jb L$cbc_dec_done 2775.byte 102,15,56,222,209 2776.byte 102,15,56,222,217 2777.byte 102,15,56,222,225 2778.byte 102,15,56,222,233 2779.byte 102,15,56,222,241 2780.byte 102,15,56,222,249 2781.byte 102,68,15,56,222,193 2782.byte 102,68,15,56,222,201 2783 movups 176-112(%rcx),%xmm1 2784 nop 2785.byte 102,15,56,222,208 2786.byte 102,15,56,222,216 2787.byte 102,15,56,222,224 2788.byte 102,15,56,222,232 2789.byte 102,15,56,222,240 2790.byte 102,15,56,222,248 2791.byte 102,68,15,56,222,192 2792.byte 102,68,15,56,222,200 2793 movups 192-112(%rcx),%xmm0 2794 je L$cbc_dec_done 2795.byte 102,15,56,222,209 2796.byte 102,15,56,222,217 2797.byte 102,15,56,222,225 2798.byte 102,15,56,222,233 2799.byte 102,15,56,222,241 2800.byte 102,15,56,222,249 2801.byte 102,68,15,56,222,193 2802.byte 102,68,15,56,222,201 2803 movups 208-112(%rcx),%xmm1 2804 nop 2805.byte 102,15,56,222,208 2806.byte 102,15,56,222,216 2807.byte 102,15,56,222,224 2808.byte 102,15,56,222,232 2809.byte 102,15,56,222,240 2810.byte 102,15,56,222,248 2811.byte 102,68,15,56,222,192 2812.byte 102,68,15,56,222,200 2813 movups 224-112(%rcx),%xmm0 2814 jmp L$cbc_dec_done 2815.p2align 4 2816L$cbc_dec_done: 2817.byte 102,15,56,222,209 2818.byte 102,15,56,222,217 2819 pxor %xmm0,%xmm10 2820 pxor %xmm0,%xmm11 2821.byte 102,15,56,222,225 2822.byte 102,15,56,222,233 2823 pxor %xmm0,%xmm12 2824 pxor %xmm0,%xmm13 2825.byte 102,15,56,222,241 2826.byte 102,15,56,222,249 2827 pxor %xmm0,%xmm14 2828 pxor %xmm0,%xmm15 2829.byte 102,68,15,56,222,193 2830.byte 102,68,15,56,222,201 2831 movdqu 80(%rdi),%xmm1 2832 2833.byte 102,65,15,56,223,210 2834 movdqu 96(%rdi),%xmm10 2835 pxor %xmm0,%xmm1 2836.byte 102,65,15,56,223,219 2837 pxor %xmm0,%xmm10 2838 movdqu 112(%rdi),%xmm0 2839.byte 102,65,15,56,223,228 2840 leaq 128(%rdi),%rdi 2841 movdqu 0(%r11),%xmm11 2842.byte 102,65,15,56,223,237 2843.byte 102,65,15,56,223,246 2844 movdqu 16(%r11),%xmm12 2845 movdqu 32(%r11),%xmm13 2846.byte 102,65,15,56,223,255 2847.byte 102,68,15,56,223,193 2848 movdqu 48(%r11),%xmm14 2849 movdqu 64(%r11),%xmm15 2850.byte 102,69,15,56,223,202 2851 movdqa %xmm0,%xmm10 2852 movdqu 80(%r11),%xmm1 2853 movups -112(%rcx),%xmm0 2854 2855 movups %xmm2,(%rsi) 2856 movdqa %xmm11,%xmm2 2857 movups %xmm3,16(%rsi) 2858 movdqa %xmm12,%xmm3 2859 movups %xmm4,32(%rsi) 2860 movdqa %xmm13,%xmm4 2861 movups %xmm5,48(%rsi) 2862 movdqa %xmm14,%xmm5 2863 movups %xmm6,64(%rsi) 2864 movdqa %xmm15,%xmm6 2865 movups %xmm7,80(%rsi) 2866 movdqa %xmm1,%xmm7 2867 movups %xmm8,96(%rsi) 2868 leaq 112(%rsi),%rsi 2869 2870 subq $128,%rdx 2871 ja L$cbc_dec_loop8 2872 2873 movaps %xmm9,%xmm2 2874 leaq -112(%rcx),%rcx 2875 addq $112,%rdx 2876 jle L$cbc_dec_clear_tail_collected 2877 movups %xmm9,(%rsi) 2878 leaq 16(%rsi),%rsi 2879 cmpq $80,%rdx 2880 jbe L$cbc_dec_tail 2881 2882 movaps %xmm11,%xmm2 2883L$cbc_dec_six_or_seven: 2884 cmpq $96,%rdx 2885 ja L$cbc_dec_seven 2886 2887 movaps %xmm7,%xmm8 2888 call _aesni_decrypt6 2889 pxor %xmm10,%xmm2 2890 movaps %xmm8,%xmm10 2891 pxor %xmm11,%xmm3 2892 movdqu %xmm2,(%rsi) 2893 pxor %xmm12,%xmm4 2894 movdqu %xmm3,16(%rsi) 2895 pxor %xmm3,%xmm3 2896 pxor %xmm13,%xmm5 2897 movdqu %xmm4,32(%rsi) 2898 pxor %xmm4,%xmm4 2899 pxor %xmm14,%xmm6 2900 movdqu %xmm5,48(%rsi) 2901 pxor %xmm5,%xmm5 2902 pxor %xmm15,%xmm7 2903 movdqu %xmm6,64(%rsi) 2904 pxor %xmm6,%xmm6 2905 leaq 80(%rsi),%rsi 2906 movdqa %xmm7,%xmm2 2907 pxor %xmm7,%xmm7 2908 jmp L$cbc_dec_tail_collected 2909 2910.p2align 4 2911L$cbc_dec_seven: 2912 movups 96(%rdi),%xmm8 2913 xorps %xmm9,%xmm9 2914 call _aesni_decrypt8 2915 movups 80(%rdi),%xmm9 2916 pxor %xmm10,%xmm2 2917 movups 96(%rdi),%xmm10 2918 pxor %xmm11,%xmm3 2919 movdqu %xmm2,(%rsi) 2920 pxor %xmm12,%xmm4 2921 movdqu %xmm3,16(%rsi) 2922 pxor %xmm3,%xmm3 2923 pxor %xmm13,%xmm5 2924 movdqu %xmm4,32(%rsi) 2925 pxor %xmm4,%xmm4 2926 pxor %xmm14,%xmm6 2927 movdqu %xmm5,48(%rsi) 2928 pxor %xmm5,%xmm5 2929 pxor %xmm15,%xmm7 2930 movdqu %xmm6,64(%rsi) 2931 pxor %xmm6,%xmm6 2932 pxor %xmm9,%xmm8 2933 movdqu %xmm7,80(%rsi) 2934 pxor %xmm7,%xmm7 2935 leaq 96(%rsi),%rsi 2936 movdqa %xmm8,%xmm2 2937 pxor %xmm8,%xmm8 2938 pxor %xmm9,%xmm9 2939 jmp L$cbc_dec_tail_collected 2940 2941.p2align 4 2942L$cbc_dec_loop6: 2943 movups %xmm7,(%rsi) 2944 leaq 16(%rsi),%rsi 2945 movdqu 0(%rdi),%xmm2 2946 movdqu 16(%rdi),%xmm3 2947 movdqa %xmm2,%xmm11 2948 movdqu 32(%rdi),%xmm4 2949 movdqa %xmm3,%xmm12 2950 movdqu 48(%rdi),%xmm5 2951 movdqa %xmm4,%xmm13 2952 movdqu 64(%rdi),%xmm6 2953 movdqa %xmm5,%xmm14 2954 movdqu 80(%rdi),%xmm7 2955 movdqa %xmm6,%xmm15 2956L$cbc_dec_loop6_enter: 2957 leaq 96(%rdi),%rdi 2958 movdqa %xmm7,%xmm8 2959 2960 call _aesni_decrypt6 2961 2962 pxor %xmm10,%xmm2 2963 movdqa %xmm8,%xmm10 2964 pxor %xmm11,%xmm3 2965 movdqu %xmm2,(%rsi) 2966 pxor %xmm12,%xmm4 2967 movdqu %xmm3,16(%rsi) 2968 pxor %xmm13,%xmm5 2969 movdqu %xmm4,32(%rsi) 2970 pxor %xmm14,%xmm6 2971 movq %r11,%rcx 2972 movdqu %xmm5,48(%rsi) 2973 pxor %xmm15,%xmm7 2974 movl %r10d,%eax 2975 movdqu %xmm6,64(%rsi) 2976 leaq 80(%rsi),%rsi 2977 subq $96,%rdx 2978 ja L$cbc_dec_loop6 2979 2980 movdqa %xmm7,%xmm2 2981 addq $80,%rdx 2982 jle L$cbc_dec_clear_tail_collected 2983 movups %xmm7,(%rsi) 2984 leaq 16(%rsi),%rsi 2985 2986L$cbc_dec_tail: 2987 movups (%rdi),%xmm2 2988 subq $16,%rdx 2989 jbe L$cbc_dec_one 2990 2991 movups 16(%rdi),%xmm3 2992 movaps %xmm2,%xmm11 2993 subq $16,%rdx 2994 jbe L$cbc_dec_two 2995 2996 movups 32(%rdi),%xmm4 2997 movaps %xmm3,%xmm12 2998 subq $16,%rdx 2999 jbe L$cbc_dec_three 3000 3001 movups 48(%rdi),%xmm5 3002 movaps %xmm4,%xmm13 3003 subq $16,%rdx 3004 jbe L$cbc_dec_four 3005 3006 movups 64(%rdi),%xmm6 3007 movaps %xmm5,%xmm14 3008 movaps %xmm6,%xmm15 3009 xorps %xmm7,%xmm7 3010 call _aesni_decrypt6 3011 pxor %xmm10,%xmm2 3012 movaps %xmm15,%xmm10 3013 pxor %xmm11,%xmm3 3014 movdqu %xmm2,(%rsi) 3015 pxor %xmm12,%xmm4 3016 movdqu %xmm3,16(%rsi) 3017 pxor %xmm3,%xmm3 3018 pxor %xmm13,%xmm5 3019 movdqu %xmm4,32(%rsi) 3020 pxor %xmm4,%xmm4 3021 pxor %xmm14,%xmm6 3022 movdqu %xmm5,48(%rsi) 3023 pxor %xmm5,%xmm5 3024 leaq 64(%rsi),%rsi 3025 movdqa %xmm6,%xmm2 3026 pxor %xmm6,%xmm6 3027 pxor %xmm7,%xmm7 3028 subq $16,%rdx 3029 jmp L$cbc_dec_tail_collected 3030 3031.p2align 4 3032L$cbc_dec_one: 3033 movaps %xmm2,%xmm11 3034 movups (%rcx),%xmm0 3035 movups 16(%rcx),%xmm1 3036 leaq 32(%rcx),%rcx 3037 xorps %xmm0,%xmm2 3038L$oop_dec1_17: 3039.byte 102,15,56,222,209 3040 decl %eax 3041 movups (%rcx),%xmm1 3042 leaq 16(%rcx),%rcx 3043 jnz L$oop_dec1_17 3044.byte 102,15,56,223,209 3045 xorps %xmm10,%xmm2 3046 movaps %xmm11,%xmm10 3047 jmp L$cbc_dec_tail_collected 3048.p2align 4 3049L$cbc_dec_two: 3050 movaps %xmm3,%xmm12 3051 call _aesni_decrypt2 3052 pxor %xmm10,%xmm2 3053 movaps %xmm12,%xmm10 3054 pxor %xmm11,%xmm3 3055 movdqu %xmm2,(%rsi) 3056 movdqa %xmm3,%xmm2 3057 pxor %xmm3,%xmm3 3058 leaq 16(%rsi),%rsi 3059 jmp L$cbc_dec_tail_collected 3060.p2align 4 3061L$cbc_dec_three: 3062 movaps %xmm4,%xmm13 3063 call _aesni_decrypt3 3064 pxor %xmm10,%xmm2 3065 movaps %xmm13,%xmm10 3066 pxor %xmm11,%xmm3 3067 movdqu %xmm2,(%rsi) 3068 pxor %xmm12,%xmm4 3069 movdqu %xmm3,16(%rsi) 3070 pxor %xmm3,%xmm3 3071 movdqa %xmm4,%xmm2 3072 pxor %xmm4,%xmm4 3073 leaq 32(%rsi),%rsi 3074 jmp L$cbc_dec_tail_collected 3075.p2align 4 3076L$cbc_dec_four: 3077 movaps %xmm5,%xmm14 3078 call _aesni_decrypt4 3079 pxor %xmm10,%xmm2 3080 movaps %xmm14,%xmm10 3081 pxor %xmm11,%xmm3 3082 movdqu %xmm2,(%rsi) 3083 pxor %xmm12,%xmm4 3084 movdqu %xmm3,16(%rsi) 3085 pxor %xmm3,%xmm3 3086 pxor %xmm13,%xmm5 3087 movdqu %xmm4,32(%rsi) 3088 pxor %xmm4,%xmm4 3089 movdqa %xmm5,%xmm2 3090 pxor %xmm5,%xmm5 3091 leaq 48(%rsi),%rsi 3092 jmp L$cbc_dec_tail_collected 3093 3094.p2align 4 3095L$cbc_dec_clear_tail_collected: 3096 pxor %xmm3,%xmm3 3097 pxor %xmm4,%xmm4 3098 pxor %xmm5,%xmm5 3099 pxor %xmm6,%xmm6 3100 pxor %xmm7,%xmm7 3101 pxor %xmm8,%xmm8 3102 pxor %xmm9,%xmm9 3103L$cbc_dec_tail_collected: 3104 movups %xmm10,(%r8) 3105 andq $15,%rdx 3106 jnz L$cbc_dec_tail_partial 3107 movups %xmm2,(%rsi) 3108 pxor %xmm2,%xmm2 3109 jmp L$cbc_dec_ret 3110.p2align 4 3111L$cbc_dec_tail_partial: 3112 movaps %xmm2,(%rsp) 3113 pxor %xmm2,%xmm2 3114 movq $16,%rcx 3115 movq %rsi,%rdi 3116 subq %rdx,%rcx 3117 leaq (%rsp),%rsi 3118.long 0x9066A4F3 3119 movdqa %xmm2,(%rsp) 3120 3121L$cbc_dec_ret: 3122 xorps %xmm0,%xmm0 3123 pxor %xmm1,%xmm1 3124 leaq (%rbp),%rsp 3125 popq %rbp 3126L$cbc_ret: 3127 .byte 0xf3,0xc3 3128 3129.globl _aesni_set_decrypt_key 3130.private_extern _aesni_set_decrypt_key 3131 3132.p2align 4 3133_aesni_set_decrypt_key: 3134.byte 0x48,0x83,0xEC,0x08 3135 call __aesni_set_encrypt_key 3136 shll $4,%esi 3137 testl %eax,%eax 3138 jnz L$dec_key_ret 3139 leaq 16(%rdx,%rsi,1),%rdi 3140 3141 movups (%rdx),%xmm0 3142 movups (%rdi),%xmm1 3143 movups %xmm0,(%rdi) 3144 movups %xmm1,(%rdx) 3145 leaq 16(%rdx),%rdx 3146 leaq -16(%rdi),%rdi 3147 3148L$dec_key_inverse: 3149 movups (%rdx),%xmm0 3150 movups (%rdi),%xmm1 3151.byte 102,15,56,219,192 3152.byte 102,15,56,219,201 3153 leaq 16(%rdx),%rdx 3154 leaq -16(%rdi),%rdi 3155 movups %xmm0,16(%rdi) 3156 movups %xmm1,-16(%rdx) 3157 cmpq %rdx,%rdi 3158 ja L$dec_key_inverse 3159 3160 movups (%rdx),%xmm0 3161.byte 102,15,56,219,192 3162 pxor %xmm1,%xmm1 3163 movups %xmm0,(%rdi) 3164 pxor %xmm0,%xmm0 3165L$dec_key_ret: 3166 addq $8,%rsp 3167 .byte 0xf3,0xc3 3168L$SEH_end_set_decrypt_key: 3169 3170.globl _aesni_set_encrypt_key 3171.private_extern _aesni_set_encrypt_key 3172 3173.p2align 4 3174_aesni_set_encrypt_key: 3175__aesni_set_encrypt_key: 3176.byte 0x48,0x83,0xEC,0x08 3177 movq $-1,%rax 3178 testq %rdi,%rdi 3179 jz L$enc_key_ret 3180 testq %rdx,%rdx 3181 jz L$enc_key_ret 3182 3183 movl $268437504,%r10d 3184 movups (%rdi),%xmm0 3185 xorps %xmm4,%xmm4 3186 andl _OPENSSL_ia32cap_P+4(%rip),%r10d 3187 leaq 16(%rdx),%rax 3188 cmpl $256,%esi 3189 je L$14rounds 3190 cmpl $192,%esi 3191 je L$12rounds 3192 cmpl $128,%esi 3193 jne L$bad_keybits 3194 3195L$10rounds: 3196 movl $9,%esi 3197 cmpl $268435456,%r10d 3198 je L$10rounds_alt 3199 3200 movups %xmm0,(%rdx) 3201.byte 102,15,58,223,200,1 3202 call L$key_expansion_128_cold 3203.byte 102,15,58,223,200,2 3204 call L$key_expansion_128 3205.byte 102,15,58,223,200,4 3206 call L$key_expansion_128 3207.byte 102,15,58,223,200,8 3208 call L$key_expansion_128 3209.byte 102,15,58,223,200,16 3210 call L$key_expansion_128 3211.byte 102,15,58,223,200,32 3212 call L$key_expansion_128 3213.byte 102,15,58,223,200,64 3214 call L$key_expansion_128 3215.byte 102,15,58,223,200,128 3216 call L$key_expansion_128 3217.byte 102,15,58,223,200,27 3218 call L$key_expansion_128 3219.byte 102,15,58,223,200,54 3220 call L$key_expansion_128 3221 movups %xmm0,(%rax) 3222 movl %esi,80(%rax) 3223 xorl %eax,%eax 3224 jmp L$enc_key_ret 3225 3226.p2align 4 3227L$10rounds_alt: 3228 movdqa L$key_rotate(%rip),%xmm5 3229 movl $8,%r10d 3230 movdqa L$key_rcon1(%rip),%xmm4 3231 movdqa %xmm0,%xmm2 3232 movdqu %xmm0,(%rdx) 3233 jmp L$oop_key128 3234 3235.p2align 4 3236L$oop_key128: 3237.byte 102,15,56,0,197 3238.byte 102,15,56,221,196 3239 pslld $1,%xmm4 3240 leaq 16(%rax),%rax 3241 3242 movdqa %xmm2,%xmm3 3243 pslldq $4,%xmm2 3244 pxor %xmm2,%xmm3 3245 pslldq $4,%xmm2 3246 pxor %xmm2,%xmm3 3247 pslldq $4,%xmm2 3248 pxor %xmm3,%xmm2 3249 3250 pxor %xmm2,%xmm0 3251 movdqu %xmm0,-16(%rax) 3252 movdqa %xmm0,%xmm2 3253 3254 decl %r10d 3255 jnz L$oop_key128 3256 3257 movdqa L$key_rcon1b(%rip),%xmm4 3258 3259.byte 102,15,56,0,197 3260.byte 102,15,56,221,196 3261 pslld $1,%xmm4 3262 3263 movdqa %xmm2,%xmm3 3264 pslldq $4,%xmm2 3265 pxor %xmm2,%xmm3 3266 pslldq $4,%xmm2 3267 pxor %xmm2,%xmm3 3268 pslldq $4,%xmm2 3269 pxor %xmm3,%xmm2 3270 3271 pxor %xmm2,%xmm0 3272 movdqu %xmm0,(%rax) 3273 3274 movdqa %xmm0,%xmm2 3275.byte 102,15,56,0,197 3276.byte 102,15,56,221,196 3277 3278 movdqa %xmm2,%xmm3 3279 pslldq $4,%xmm2 3280 pxor %xmm2,%xmm3 3281 pslldq $4,%xmm2 3282 pxor %xmm2,%xmm3 3283 pslldq $4,%xmm2 3284 pxor %xmm3,%xmm2 3285 3286 pxor %xmm2,%xmm0 3287 movdqu %xmm0,16(%rax) 3288 3289 movl %esi,96(%rax) 3290 xorl %eax,%eax 3291 jmp L$enc_key_ret 3292 3293.p2align 4 3294L$12rounds: 3295 movq 16(%rdi),%xmm2 3296 movl $11,%esi 3297 cmpl $268435456,%r10d 3298 je L$12rounds_alt 3299 3300 movups %xmm0,(%rdx) 3301.byte 102,15,58,223,202,1 3302 call L$key_expansion_192a_cold 3303.byte 102,15,58,223,202,2 3304 call L$key_expansion_192b 3305.byte 102,15,58,223,202,4 3306 call L$key_expansion_192a 3307.byte 102,15,58,223,202,8 3308 call L$key_expansion_192b 3309.byte 102,15,58,223,202,16 3310 call L$key_expansion_192a 3311.byte 102,15,58,223,202,32 3312 call L$key_expansion_192b 3313.byte 102,15,58,223,202,64 3314 call L$key_expansion_192a 3315.byte 102,15,58,223,202,128 3316 call L$key_expansion_192b 3317 movups %xmm0,(%rax) 3318 movl %esi,48(%rax) 3319 xorq %rax,%rax 3320 jmp L$enc_key_ret 3321 3322.p2align 4 3323L$12rounds_alt: 3324 movdqa L$key_rotate192(%rip),%xmm5 3325 movdqa L$key_rcon1(%rip),%xmm4 3326 movl $8,%r10d 3327 movdqu %xmm0,(%rdx) 3328 jmp L$oop_key192 3329 3330.p2align 4 3331L$oop_key192: 3332 movq %xmm2,0(%rax) 3333 movdqa %xmm2,%xmm1 3334.byte 102,15,56,0,213 3335.byte 102,15,56,221,212 3336 pslld $1,%xmm4 3337 leaq 24(%rax),%rax 3338 3339 movdqa %xmm0,%xmm3 3340 pslldq $4,%xmm0 3341 pxor %xmm0,%xmm3 3342 pslldq $4,%xmm0 3343 pxor %xmm0,%xmm3 3344 pslldq $4,%xmm0 3345 pxor %xmm3,%xmm0 3346 3347 pshufd $255,%xmm0,%xmm3 3348 pxor %xmm1,%xmm3 3349 pslldq $4,%xmm1 3350 pxor %xmm1,%xmm3 3351 3352 pxor %xmm2,%xmm0 3353 pxor %xmm3,%xmm2 3354 movdqu %xmm0,-16(%rax) 3355 3356 decl %r10d 3357 jnz L$oop_key192 3358 3359 movl %esi,32(%rax) 3360 xorl %eax,%eax 3361 jmp L$enc_key_ret 3362 3363.p2align 4 3364L$14rounds: 3365 movups 16(%rdi),%xmm2 3366 movl $13,%esi 3367 leaq 16(%rax),%rax 3368 cmpl $268435456,%r10d 3369 je L$14rounds_alt 3370 3371 movups %xmm0,(%rdx) 3372 movups %xmm2,16(%rdx) 3373.byte 102,15,58,223,202,1 3374 call L$key_expansion_256a_cold 3375.byte 102,15,58,223,200,1 3376 call L$key_expansion_256b 3377.byte 102,15,58,223,202,2 3378 call L$key_expansion_256a 3379.byte 102,15,58,223,200,2 3380 call L$key_expansion_256b 3381.byte 102,15,58,223,202,4 3382 call L$key_expansion_256a 3383.byte 102,15,58,223,200,4 3384 call L$key_expansion_256b 3385.byte 102,15,58,223,202,8 3386 call L$key_expansion_256a 3387.byte 102,15,58,223,200,8 3388 call L$key_expansion_256b 3389.byte 102,15,58,223,202,16 3390 call L$key_expansion_256a 3391.byte 102,15,58,223,200,16 3392 call L$key_expansion_256b 3393.byte 102,15,58,223,202,32 3394 call L$key_expansion_256a 3395.byte 102,15,58,223,200,32 3396 call L$key_expansion_256b 3397.byte 102,15,58,223,202,64 3398 call L$key_expansion_256a 3399 movups %xmm0,(%rax) 3400 movl %esi,16(%rax) 3401 xorq %rax,%rax 3402 jmp L$enc_key_ret 3403 3404.p2align 4 3405L$14rounds_alt: 3406 movdqa L$key_rotate(%rip),%xmm5 3407 movdqa L$key_rcon1(%rip),%xmm4 3408 movl $7,%r10d 3409 movdqu %xmm0,0(%rdx) 3410 movdqa %xmm2,%xmm1 3411 movdqu %xmm2,16(%rdx) 3412 jmp L$oop_key256 3413 3414.p2align 4 3415L$oop_key256: 3416.byte 102,15,56,0,213 3417.byte 102,15,56,221,212 3418 3419 movdqa %xmm0,%xmm3 3420 pslldq $4,%xmm0 3421 pxor %xmm0,%xmm3 3422 pslldq $4,%xmm0 3423 pxor %xmm0,%xmm3 3424 pslldq $4,%xmm0 3425 pxor %xmm3,%xmm0 3426 pslld $1,%xmm4 3427 3428 pxor %xmm2,%xmm0 3429 movdqu %xmm0,(%rax) 3430 3431 decl %r10d 3432 jz L$done_key256 3433 3434 pshufd $255,%xmm0,%xmm2 3435 pxor %xmm3,%xmm3 3436.byte 102,15,56,221,211 3437 3438 movdqa %xmm1,%xmm3 3439 pslldq $4,%xmm1 3440 pxor %xmm1,%xmm3 3441 pslldq $4,%xmm1 3442 pxor %xmm1,%xmm3 3443 pslldq $4,%xmm1 3444 pxor %xmm3,%xmm1 3445 3446 pxor %xmm1,%xmm2 3447 movdqu %xmm2,16(%rax) 3448 leaq 32(%rax),%rax 3449 movdqa %xmm2,%xmm1 3450 3451 jmp L$oop_key256 3452 3453L$done_key256: 3454 movl %esi,16(%rax) 3455 xorl %eax,%eax 3456 jmp L$enc_key_ret 3457 3458.p2align 4 3459L$bad_keybits: 3460 movq $-2,%rax 3461L$enc_key_ret: 3462 pxor %xmm0,%xmm0 3463 pxor %xmm1,%xmm1 3464 pxor %xmm2,%xmm2 3465 pxor %xmm3,%xmm3 3466 pxor %xmm4,%xmm4 3467 pxor %xmm5,%xmm5 3468 addq $8,%rsp 3469 .byte 0xf3,0xc3 3470L$SEH_end_set_encrypt_key: 3471 3472.p2align 4 3473L$key_expansion_128: 3474 movups %xmm0,(%rax) 3475 leaq 16(%rax),%rax 3476L$key_expansion_128_cold: 3477 shufps $16,%xmm0,%xmm4 3478 xorps %xmm4,%xmm0 3479 shufps $140,%xmm0,%xmm4 3480 xorps %xmm4,%xmm0 3481 shufps $255,%xmm1,%xmm1 3482 xorps %xmm1,%xmm0 3483 .byte 0xf3,0xc3 3484 3485.p2align 4 3486L$key_expansion_192a: 3487 movups %xmm0,(%rax) 3488 leaq 16(%rax),%rax 3489L$key_expansion_192a_cold: 3490 movaps %xmm2,%xmm5 3491L$key_expansion_192b_warm: 3492 shufps $16,%xmm0,%xmm4 3493 movdqa %xmm2,%xmm3 3494 xorps %xmm4,%xmm0 3495 shufps $140,%xmm0,%xmm4 3496 pslldq $4,%xmm3 3497 xorps %xmm4,%xmm0 3498 pshufd $85,%xmm1,%xmm1 3499 pxor %xmm3,%xmm2 3500 pxor %xmm1,%xmm0 3501 pshufd $255,%xmm0,%xmm3 3502 pxor %xmm3,%xmm2 3503 .byte 0xf3,0xc3 3504 3505.p2align 4 3506L$key_expansion_192b: 3507 movaps %xmm0,%xmm3 3508 shufps $68,%xmm0,%xmm5 3509 movups %xmm5,(%rax) 3510 shufps $78,%xmm2,%xmm3 3511 movups %xmm3,16(%rax) 3512 leaq 32(%rax),%rax 3513 jmp L$key_expansion_192b_warm 3514 3515.p2align 4 3516L$key_expansion_256a: 3517 movups %xmm2,(%rax) 3518 leaq 16(%rax),%rax 3519L$key_expansion_256a_cold: 3520 shufps $16,%xmm0,%xmm4 3521 xorps %xmm4,%xmm0 3522 shufps $140,%xmm0,%xmm4 3523 xorps %xmm4,%xmm0 3524 shufps $255,%xmm1,%xmm1 3525 xorps %xmm1,%xmm0 3526 .byte 0xf3,0xc3 3527 3528.p2align 4 3529L$key_expansion_256b: 3530 movups %xmm0,(%rax) 3531 leaq 16(%rax),%rax 3532 3533 shufps $16,%xmm2,%xmm4 3534 xorps %xmm4,%xmm2 3535 shufps $140,%xmm2,%xmm4 3536 xorps %xmm4,%xmm2 3537 shufps $170,%xmm1,%xmm1 3538 xorps %xmm1,%xmm2 3539 .byte 0xf3,0xc3 3540 3541 3542.p2align 6 3543L$bswap_mask: 3544.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 3545L$increment32: 3546.long 6,6,6,0 3547L$increment64: 3548.long 1,0,0,0 3549L$xts_magic: 3550.long 0x87,0,1,0 3551L$increment1: 3552.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 3553L$key_rotate: 3554.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 3555L$key_rotate192: 3556.long 0x04070605,0x04070605,0x04070605,0x04070605 3557L$key_rcon1: 3558.long 1,1,1,1 3559L$key_rcon1b: 3560.long 0x1b,0x1b,0x1b,0x1b 3561 3562.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 3563.p2align 6 3564#endif 3565