1#if defined(__x86_64__) 2.text 3.extern OPENSSL_ia32cap_P 4.hidden OPENSSL_ia32cap_P 5.globl aesni_encrypt 6.hidden aesni_encrypt 7.type aesni_encrypt,@function 8.align 16 9aesni_encrypt: 10 movups (%rdi),%xmm2 11 movl 240(%rdx),%eax 12 movups (%rdx),%xmm0 13 movups 16(%rdx),%xmm1 14 leaq 32(%rdx),%rdx 15 xorps %xmm0,%xmm2 16.Loop_enc1_1: 17.byte 102,15,56,220,209 18 decl %eax 19 movups (%rdx),%xmm1 20 leaq 16(%rdx),%rdx 21 jnz .Loop_enc1_1 22.byte 102,15,56,221,209 23 pxor %xmm0,%xmm0 24 pxor %xmm1,%xmm1 25 movups %xmm2,(%rsi) 26 pxor %xmm2,%xmm2 27 .byte 0xf3,0xc3 28.size aesni_encrypt,.-aesni_encrypt 29 30.globl aesni_decrypt 31.hidden aesni_decrypt 32.type aesni_decrypt,@function 33.align 16 34aesni_decrypt: 35 movups (%rdi),%xmm2 36 movl 240(%rdx),%eax 37 movups (%rdx),%xmm0 38 movups 16(%rdx),%xmm1 39 leaq 32(%rdx),%rdx 40 xorps %xmm0,%xmm2 41.Loop_dec1_2: 42.byte 102,15,56,222,209 43 decl %eax 44 movups (%rdx),%xmm1 45 leaq 16(%rdx),%rdx 46 jnz .Loop_dec1_2 47.byte 102,15,56,223,209 48 pxor %xmm0,%xmm0 49 pxor %xmm1,%xmm1 50 movups %xmm2,(%rsi) 51 pxor %xmm2,%xmm2 52 .byte 0xf3,0xc3 53.size aesni_decrypt, .-aesni_decrypt 54.type _aesni_encrypt2,@function 55.align 16 56_aesni_encrypt2: 57 movups (%rcx),%xmm0 58 shll $4,%eax 59 movups 16(%rcx),%xmm1 60 xorps %xmm0,%xmm2 61 xorps %xmm0,%xmm3 62 movups 32(%rcx),%xmm0 63 leaq 32(%rcx,%rax,1),%rcx 64 negq %rax 65 addq $16,%rax 66 67.Lenc_loop2: 68.byte 102,15,56,220,209 69.byte 102,15,56,220,217 70 movups (%rcx,%rax,1),%xmm1 71 addq $32,%rax 72.byte 102,15,56,220,208 73.byte 102,15,56,220,216 74 movups -16(%rcx,%rax,1),%xmm0 75 jnz .Lenc_loop2 76 77.byte 102,15,56,220,209 78.byte 102,15,56,220,217 79.byte 102,15,56,221,208 80.byte 102,15,56,221,216 81 .byte 0xf3,0xc3 82.size _aesni_encrypt2,.-_aesni_encrypt2 83.type _aesni_decrypt2,@function 84.align 16 85_aesni_decrypt2: 86 movups (%rcx),%xmm0 87 shll $4,%eax 88 movups 16(%rcx),%xmm1 89 xorps %xmm0,%xmm2 90 xorps %xmm0,%xmm3 91 movups 32(%rcx),%xmm0 92 leaq 32(%rcx,%rax,1),%rcx 93 negq %rax 94 addq $16,%rax 95 96.Ldec_loop2: 97.byte 102,15,56,222,209 98.byte 102,15,56,222,217 99 movups (%rcx,%rax,1),%xmm1 100 addq $32,%rax 101.byte 102,15,56,222,208 102.byte 102,15,56,222,216 103 movups -16(%rcx,%rax,1),%xmm0 104 jnz .Ldec_loop2 105 106.byte 102,15,56,222,209 107.byte 102,15,56,222,217 108.byte 102,15,56,223,208 109.byte 102,15,56,223,216 110 .byte 0xf3,0xc3 111.size _aesni_decrypt2,.-_aesni_decrypt2 112.type _aesni_encrypt3,@function 113.align 16 114_aesni_encrypt3: 115 movups (%rcx),%xmm0 116 shll $4,%eax 117 movups 16(%rcx),%xmm1 118 xorps %xmm0,%xmm2 119 xorps %xmm0,%xmm3 120 xorps %xmm0,%xmm4 121 movups 32(%rcx),%xmm0 122 leaq 32(%rcx,%rax,1),%rcx 123 negq %rax 124 addq $16,%rax 125 126.Lenc_loop3: 127.byte 102,15,56,220,209 128.byte 102,15,56,220,217 129.byte 102,15,56,220,225 130 movups (%rcx,%rax,1),%xmm1 131 addq $32,%rax 132.byte 102,15,56,220,208 133.byte 102,15,56,220,216 134.byte 102,15,56,220,224 135 movups -16(%rcx,%rax,1),%xmm0 136 jnz .Lenc_loop3 137 138.byte 102,15,56,220,209 139.byte 102,15,56,220,217 140.byte 102,15,56,220,225 141.byte 102,15,56,221,208 142.byte 102,15,56,221,216 143.byte 102,15,56,221,224 144 .byte 0xf3,0xc3 145.size _aesni_encrypt3,.-_aesni_encrypt3 146.type _aesni_decrypt3,@function 147.align 16 148_aesni_decrypt3: 149 movups (%rcx),%xmm0 150 shll $4,%eax 151 movups 16(%rcx),%xmm1 152 xorps %xmm0,%xmm2 153 xorps %xmm0,%xmm3 154 xorps %xmm0,%xmm4 155 movups 32(%rcx),%xmm0 156 leaq 32(%rcx,%rax,1),%rcx 157 negq %rax 158 addq $16,%rax 159 160.Ldec_loop3: 161.byte 102,15,56,222,209 162.byte 102,15,56,222,217 163.byte 102,15,56,222,225 164 movups (%rcx,%rax,1),%xmm1 165 addq $32,%rax 166.byte 102,15,56,222,208 167.byte 102,15,56,222,216 168.byte 102,15,56,222,224 169 movups -16(%rcx,%rax,1),%xmm0 170 jnz .Ldec_loop3 171 172.byte 102,15,56,222,209 173.byte 102,15,56,222,217 174.byte 102,15,56,222,225 175.byte 102,15,56,223,208 176.byte 102,15,56,223,216 177.byte 102,15,56,223,224 178 .byte 0xf3,0xc3 179.size _aesni_decrypt3,.-_aesni_decrypt3 180.type _aesni_encrypt4,@function 181.align 16 182_aesni_encrypt4: 183 movups (%rcx),%xmm0 184 shll $4,%eax 185 movups 16(%rcx),%xmm1 186 xorps %xmm0,%xmm2 187 xorps %xmm0,%xmm3 188 xorps %xmm0,%xmm4 189 xorps %xmm0,%xmm5 190 movups 32(%rcx),%xmm0 191 leaq 32(%rcx,%rax,1),%rcx 192 negq %rax 193.byte 0x0f,0x1f,0x00 194 addq $16,%rax 195 196.Lenc_loop4: 197.byte 102,15,56,220,209 198.byte 102,15,56,220,217 199.byte 102,15,56,220,225 200.byte 102,15,56,220,233 201 movups (%rcx,%rax,1),%xmm1 202 addq $32,%rax 203.byte 102,15,56,220,208 204.byte 102,15,56,220,216 205.byte 102,15,56,220,224 206.byte 102,15,56,220,232 207 movups -16(%rcx,%rax,1),%xmm0 208 jnz .Lenc_loop4 209 210.byte 102,15,56,220,209 211.byte 102,15,56,220,217 212.byte 102,15,56,220,225 213.byte 102,15,56,220,233 214.byte 102,15,56,221,208 215.byte 102,15,56,221,216 216.byte 102,15,56,221,224 217.byte 102,15,56,221,232 218 .byte 0xf3,0xc3 219.size _aesni_encrypt4,.-_aesni_encrypt4 220.type _aesni_decrypt4,@function 221.align 16 222_aesni_decrypt4: 223 movups (%rcx),%xmm0 224 shll $4,%eax 225 movups 16(%rcx),%xmm1 226 xorps %xmm0,%xmm2 227 xorps %xmm0,%xmm3 228 xorps %xmm0,%xmm4 229 xorps %xmm0,%xmm5 230 movups 32(%rcx),%xmm0 231 leaq 32(%rcx,%rax,1),%rcx 232 negq %rax 233.byte 0x0f,0x1f,0x00 234 addq $16,%rax 235 236.Ldec_loop4: 237.byte 102,15,56,222,209 238.byte 102,15,56,222,217 239.byte 102,15,56,222,225 240.byte 102,15,56,222,233 241 movups (%rcx,%rax,1),%xmm1 242 addq $32,%rax 243.byte 102,15,56,222,208 244.byte 102,15,56,222,216 245.byte 102,15,56,222,224 246.byte 102,15,56,222,232 247 movups -16(%rcx,%rax,1),%xmm0 248 jnz .Ldec_loop4 249 250.byte 102,15,56,222,209 251.byte 102,15,56,222,217 252.byte 102,15,56,222,225 253.byte 102,15,56,222,233 254.byte 102,15,56,223,208 255.byte 102,15,56,223,216 256.byte 102,15,56,223,224 257.byte 102,15,56,223,232 258 .byte 0xf3,0xc3 259.size _aesni_decrypt4,.-_aesni_decrypt4 260.type _aesni_encrypt6,@function 261.align 16 262_aesni_encrypt6: 263 movups (%rcx),%xmm0 264 shll $4,%eax 265 movups 16(%rcx),%xmm1 266 xorps %xmm0,%xmm2 267 pxor %xmm0,%xmm3 268 pxor %xmm0,%xmm4 269.byte 102,15,56,220,209 270 leaq 32(%rcx,%rax,1),%rcx 271 negq %rax 272.byte 102,15,56,220,217 273 pxor %xmm0,%xmm5 274 pxor %xmm0,%xmm6 275.byte 102,15,56,220,225 276 pxor %xmm0,%xmm7 277 movups (%rcx,%rax,1),%xmm0 278 addq $16,%rax 279 jmp .Lenc_loop6_enter 280.align 16 281.Lenc_loop6: 282.byte 102,15,56,220,209 283.byte 102,15,56,220,217 284.byte 102,15,56,220,225 285.Lenc_loop6_enter: 286.byte 102,15,56,220,233 287.byte 102,15,56,220,241 288.byte 102,15,56,220,249 289 movups (%rcx,%rax,1),%xmm1 290 addq $32,%rax 291.byte 102,15,56,220,208 292.byte 102,15,56,220,216 293.byte 102,15,56,220,224 294.byte 102,15,56,220,232 295.byte 102,15,56,220,240 296.byte 102,15,56,220,248 297 movups -16(%rcx,%rax,1),%xmm0 298 jnz .Lenc_loop6 299 300.byte 102,15,56,220,209 301.byte 102,15,56,220,217 302.byte 102,15,56,220,225 303.byte 102,15,56,220,233 304.byte 102,15,56,220,241 305.byte 102,15,56,220,249 306.byte 102,15,56,221,208 307.byte 102,15,56,221,216 308.byte 102,15,56,221,224 309.byte 102,15,56,221,232 310.byte 102,15,56,221,240 311.byte 102,15,56,221,248 312 .byte 0xf3,0xc3 313.size _aesni_encrypt6,.-_aesni_encrypt6 314.type _aesni_decrypt6,@function 315.align 16 316_aesni_decrypt6: 317 movups (%rcx),%xmm0 318 shll $4,%eax 319 movups 16(%rcx),%xmm1 320 xorps %xmm0,%xmm2 321 pxor %xmm0,%xmm3 322 pxor %xmm0,%xmm4 323.byte 102,15,56,222,209 324 leaq 32(%rcx,%rax,1),%rcx 325 negq %rax 326.byte 102,15,56,222,217 327 pxor %xmm0,%xmm5 328 pxor %xmm0,%xmm6 329.byte 102,15,56,222,225 330 pxor %xmm0,%xmm7 331 movups (%rcx,%rax,1),%xmm0 332 addq $16,%rax 333 jmp .Ldec_loop6_enter 334.align 16 335.Ldec_loop6: 336.byte 102,15,56,222,209 337.byte 102,15,56,222,217 338.byte 102,15,56,222,225 339.Ldec_loop6_enter: 340.byte 102,15,56,222,233 341.byte 102,15,56,222,241 342.byte 102,15,56,222,249 343 movups (%rcx,%rax,1),%xmm1 344 addq $32,%rax 345.byte 102,15,56,222,208 346.byte 102,15,56,222,216 347.byte 102,15,56,222,224 348.byte 102,15,56,222,232 349.byte 102,15,56,222,240 350.byte 102,15,56,222,248 351 movups -16(%rcx,%rax,1),%xmm0 352 jnz .Ldec_loop6 353 354.byte 102,15,56,222,209 355.byte 102,15,56,222,217 356.byte 102,15,56,222,225 357.byte 102,15,56,222,233 358.byte 102,15,56,222,241 359.byte 102,15,56,222,249 360.byte 102,15,56,223,208 361.byte 102,15,56,223,216 362.byte 102,15,56,223,224 363.byte 102,15,56,223,232 364.byte 102,15,56,223,240 365.byte 102,15,56,223,248 366 .byte 0xf3,0xc3 367.size _aesni_decrypt6,.-_aesni_decrypt6 368.type _aesni_encrypt8,@function 369.align 16 370_aesni_encrypt8: 371 movups (%rcx),%xmm0 372 shll $4,%eax 373 movups 16(%rcx),%xmm1 374 xorps %xmm0,%xmm2 375 xorps %xmm0,%xmm3 376 pxor %xmm0,%xmm4 377 pxor %xmm0,%xmm5 378 pxor %xmm0,%xmm6 379 leaq 32(%rcx,%rax,1),%rcx 380 negq %rax 381.byte 102,15,56,220,209 382 pxor %xmm0,%xmm7 383 pxor %xmm0,%xmm8 384.byte 102,15,56,220,217 385 pxor %xmm0,%xmm9 386 movups (%rcx,%rax,1),%xmm0 387 addq $16,%rax 388 jmp .Lenc_loop8_inner 389.align 16 390.Lenc_loop8: 391.byte 102,15,56,220,209 392.byte 102,15,56,220,217 393.Lenc_loop8_inner: 394.byte 102,15,56,220,225 395.byte 102,15,56,220,233 396.byte 102,15,56,220,241 397.byte 102,15,56,220,249 398.byte 102,68,15,56,220,193 399.byte 102,68,15,56,220,201 400.Lenc_loop8_enter: 401 movups (%rcx,%rax,1),%xmm1 402 addq $32,%rax 403.byte 102,15,56,220,208 404.byte 102,15,56,220,216 405.byte 102,15,56,220,224 406.byte 102,15,56,220,232 407.byte 102,15,56,220,240 408.byte 102,15,56,220,248 409.byte 102,68,15,56,220,192 410.byte 102,68,15,56,220,200 411 movups -16(%rcx,%rax,1),%xmm0 412 jnz .Lenc_loop8 413 414.byte 102,15,56,220,209 415.byte 102,15,56,220,217 416.byte 102,15,56,220,225 417.byte 102,15,56,220,233 418.byte 102,15,56,220,241 419.byte 102,15,56,220,249 420.byte 102,68,15,56,220,193 421.byte 102,68,15,56,220,201 422.byte 102,15,56,221,208 423.byte 102,15,56,221,216 424.byte 102,15,56,221,224 425.byte 102,15,56,221,232 426.byte 102,15,56,221,240 427.byte 102,15,56,221,248 428.byte 102,68,15,56,221,192 429.byte 102,68,15,56,221,200 430 .byte 0xf3,0xc3 431.size _aesni_encrypt8,.-_aesni_encrypt8 432.type _aesni_decrypt8,@function 433.align 16 434_aesni_decrypt8: 435 movups (%rcx),%xmm0 436 shll $4,%eax 437 movups 16(%rcx),%xmm1 438 xorps %xmm0,%xmm2 439 xorps %xmm0,%xmm3 440 pxor %xmm0,%xmm4 441 pxor %xmm0,%xmm5 442 pxor %xmm0,%xmm6 443 leaq 32(%rcx,%rax,1),%rcx 444 negq %rax 445.byte 102,15,56,222,209 446 pxor %xmm0,%xmm7 447 pxor %xmm0,%xmm8 448.byte 102,15,56,222,217 449 pxor %xmm0,%xmm9 450 movups (%rcx,%rax,1),%xmm0 451 addq $16,%rax 452 jmp .Ldec_loop8_inner 453.align 16 454.Ldec_loop8: 455.byte 102,15,56,222,209 456.byte 102,15,56,222,217 457.Ldec_loop8_inner: 458.byte 102,15,56,222,225 459.byte 102,15,56,222,233 460.byte 102,15,56,222,241 461.byte 102,15,56,222,249 462.byte 102,68,15,56,222,193 463.byte 102,68,15,56,222,201 464.Ldec_loop8_enter: 465 movups (%rcx,%rax,1),%xmm1 466 addq $32,%rax 467.byte 102,15,56,222,208 468.byte 102,15,56,222,216 469.byte 102,15,56,222,224 470.byte 102,15,56,222,232 471.byte 102,15,56,222,240 472.byte 102,15,56,222,248 473.byte 102,68,15,56,222,192 474.byte 102,68,15,56,222,200 475 movups -16(%rcx,%rax,1),%xmm0 476 jnz .Ldec_loop8 477 478.byte 102,15,56,222,209 479.byte 102,15,56,222,217 480.byte 102,15,56,222,225 481.byte 102,15,56,222,233 482.byte 102,15,56,222,241 483.byte 102,15,56,222,249 484.byte 102,68,15,56,222,193 485.byte 102,68,15,56,222,201 486.byte 102,15,56,223,208 487.byte 102,15,56,223,216 488.byte 102,15,56,223,224 489.byte 102,15,56,223,232 490.byte 102,15,56,223,240 491.byte 102,15,56,223,248 492.byte 102,68,15,56,223,192 493.byte 102,68,15,56,223,200 494 .byte 0xf3,0xc3 495.size _aesni_decrypt8,.-_aesni_decrypt8 496.globl aesni_ecb_encrypt 497.hidden aesni_ecb_encrypt 498.type aesni_ecb_encrypt,@function 499.align 16 500aesni_ecb_encrypt: 501 andq $-16,%rdx 502 jz .Lecb_ret 503 504 movl 240(%rcx),%eax 505 movups (%rcx),%xmm0 506 movq %rcx,%r11 507 movl %eax,%r10d 508 testl %r8d,%r8d 509 jz .Lecb_decrypt 510 511 cmpq $0x80,%rdx 512 jb .Lecb_enc_tail 513 514 movdqu (%rdi),%xmm2 515 movdqu 16(%rdi),%xmm3 516 movdqu 32(%rdi),%xmm4 517 movdqu 48(%rdi),%xmm5 518 movdqu 64(%rdi),%xmm6 519 movdqu 80(%rdi),%xmm7 520 movdqu 96(%rdi),%xmm8 521 movdqu 112(%rdi),%xmm9 522 leaq 128(%rdi),%rdi 523 subq $0x80,%rdx 524 jmp .Lecb_enc_loop8_enter 525.align 16 526.Lecb_enc_loop8: 527 movups %xmm2,(%rsi) 528 movq %r11,%rcx 529 movdqu (%rdi),%xmm2 530 movl %r10d,%eax 531 movups %xmm3,16(%rsi) 532 movdqu 16(%rdi),%xmm3 533 movups %xmm4,32(%rsi) 534 movdqu 32(%rdi),%xmm4 535 movups %xmm5,48(%rsi) 536 movdqu 48(%rdi),%xmm5 537 movups %xmm6,64(%rsi) 538 movdqu 64(%rdi),%xmm6 539 movups %xmm7,80(%rsi) 540 movdqu 80(%rdi),%xmm7 541 movups %xmm8,96(%rsi) 542 movdqu 96(%rdi),%xmm8 543 movups %xmm9,112(%rsi) 544 leaq 128(%rsi),%rsi 545 movdqu 112(%rdi),%xmm9 546 leaq 128(%rdi),%rdi 547.Lecb_enc_loop8_enter: 548 549 call _aesni_encrypt8 550 551 subq $0x80,%rdx 552 jnc .Lecb_enc_loop8 553 554 movups %xmm2,(%rsi) 555 movq %r11,%rcx 556 movups %xmm3,16(%rsi) 557 movl %r10d,%eax 558 movups %xmm4,32(%rsi) 559 movups %xmm5,48(%rsi) 560 movups %xmm6,64(%rsi) 561 movups %xmm7,80(%rsi) 562 movups %xmm8,96(%rsi) 563 movups %xmm9,112(%rsi) 564 leaq 128(%rsi),%rsi 565 addq $0x80,%rdx 566 jz .Lecb_ret 567 568.Lecb_enc_tail: 569 movups (%rdi),%xmm2 570 cmpq $0x20,%rdx 571 jb .Lecb_enc_one 572 movups 16(%rdi),%xmm3 573 je .Lecb_enc_two 574 movups 32(%rdi),%xmm4 575 cmpq $0x40,%rdx 576 jb .Lecb_enc_three 577 movups 48(%rdi),%xmm5 578 je .Lecb_enc_four 579 movups 64(%rdi),%xmm6 580 cmpq $0x60,%rdx 581 jb .Lecb_enc_five 582 movups 80(%rdi),%xmm7 583 je .Lecb_enc_six 584 movdqu 96(%rdi),%xmm8 585 xorps %xmm9,%xmm9 586 call _aesni_encrypt8 587 movups %xmm2,(%rsi) 588 movups %xmm3,16(%rsi) 589 movups %xmm4,32(%rsi) 590 movups %xmm5,48(%rsi) 591 movups %xmm6,64(%rsi) 592 movups %xmm7,80(%rsi) 593 movups %xmm8,96(%rsi) 594 jmp .Lecb_ret 595.align 16 596.Lecb_enc_one: 597 movups (%rcx),%xmm0 598 movups 16(%rcx),%xmm1 599 leaq 32(%rcx),%rcx 600 xorps %xmm0,%xmm2 601.Loop_enc1_3: 602.byte 102,15,56,220,209 603 decl %eax 604 movups (%rcx),%xmm1 605 leaq 16(%rcx),%rcx 606 jnz .Loop_enc1_3 607.byte 102,15,56,221,209 608 movups %xmm2,(%rsi) 609 jmp .Lecb_ret 610.align 16 611.Lecb_enc_two: 612 call _aesni_encrypt2 613 movups %xmm2,(%rsi) 614 movups %xmm3,16(%rsi) 615 jmp .Lecb_ret 616.align 16 617.Lecb_enc_three: 618 call _aesni_encrypt3 619 movups %xmm2,(%rsi) 620 movups %xmm3,16(%rsi) 621 movups %xmm4,32(%rsi) 622 jmp .Lecb_ret 623.align 16 624.Lecb_enc_four: 625 call _aesni_encrypt4 626 movups %xmm2,(%rsi) 627 movups %xmm3,16(%rsi) 628 movups %xmm4,32(%rsi) 629 movups %xmm5,48(%rsi) 630 jmp .Lecb_ret 631.align 16 632.Lecb_enc_five: 633 xorps %xmm7,%xmm7 634 call _aesni_encrypt6 635 movups %xmm2,(%rsi) 636 movups %xmm3,16(%rsi) 637 movups %xmm4,32(%rsi) 638 movups %xmm5,48(%rsi) 639 movups %xmm6,64(%rsi) 640 jmp .Lecb_ret 641.align 16 642.Lecb_enc_six: 643 call _aesni_encrypt6 644 movups %xmm2,(%rsi) 645 movups %xmm3,16(%rsi) 646 movups %xmm4,32(%rsi) 647 movups %xmm5,48(%rsi) 648 movups %xmm6,64(%rsi) 649 movups %xmm7,80(%rsi) 650 jmp .Lecb_ret 651 652.align 16 653.Lecb_decrypt: 654 cmpq $0x80,%rdx 655 jb .Lecb_dec_tail 656 657 movdqu (%rdi),%xmm2 658 movdqu 16(%rdi),%xmm3 659 movdqu 32(%rdi),%xmm4 660 movdqu 48(%rdi),%xmm5 661 movdqu 64(%rdi),%xmm6 662 movdqu 80(%rdi),%xmm7 663 movdqu 96(%rdi),%xmm8 664 movdqu 112(%rdi),%xmm9 665 leaq 128(%rdi),%rdi 666 subq $0x80,%rdx 667 jmp .Lecb_dec_loop8_enter 668.align 16 669.Lecb_dec_loop8: 670 movups %xmm2,(%rsi) 671 movq %r11,%rcx 672 movdqu (%rdi),%xmm2 673 movl %r10d,%eax 674 movups %xmm3,16(%rsi) 675 movdqu 16(%rdi),%xmm3 676 movups %xmm4,32(%rsi) 677 movdqu 32(%rdi),%xmm4 678 movups %xmm5,48(%rsi) 679 movdqu 48(%rdi),%xmm5 680 movups %xmm6,64(%rsi) 681 movdqu 64(%rdi),%xmm6 682 movups %xmm7,80(%rsi) 683 movdqu 80(%rdi),%xmm7 684 movups %xmm8,96(%rsi) 685 movdqu 96(%rdi),%xmm8 686 movups %xmm9,112(%rsi) 687 leaq 128(%rsi),%rsi 688 movdqu 112(%rdi),%xmm9 689 leaq 128(%rdi),%rdi 690.Lecb_dec_loop8_enter: 691 692 call _aesni_decrypt8 693 694 movups (%r11),%xmm0 695 subq $0x80,%rdx 696 jnc .Lecb_dec_loop8 697 698 movups %xmm2,(%rsi) 699 pxor %xmm2,%xmm2 700 movq %r11,%rcx 701 movups %xmm3,16(%rsi) 702 pxor %xmm3,%xmm3 703 movl %r10d,%eax 704 movups %xmm4,32(%rsi) 705 pxor %xmm4,%xmm4 706 movups %xmm5,48(%rsi) 707 pxor %xmm5,%xmm5 708 movups %xmm6,64(%rsi) 709 pxor %xmm6,%xmm6 710 movups %xmm7,80(%rsi) 711 pxor %xmm7,%xmm7 712 movups %xmm8,96(%rsi) 713 pxor %xmm8,%xmm8 714 movups %xmm9,112(%rsi) 715 pxor %xmm9,%xmm9 716 leaq 128(%rsi),%rsi 717 addq $0x80,%rdx 718 jz .Lecb_ret 719 720.Lecb_dec_tail: 721 movups (%rdi),%xmm2 722 cmpq $0x20,%rdx 723 jb .Lecb_dec_one 724 movups 16(%rdi),%xmm3 725 je .Lecb_dec_two 726 movups 32(%rdi),%xmm4 727 cmpq $0x40,%rdx 728 jb .Lecb_dec_three 729 movups 48(%rdi),%xmm5 730 je .Lecb_dec_four 731 movups 64(%rdi),%xmm6 732 cmpq $0x60,%rdx 733 jb .Lecb_dec_five 734 movups 80(%rdi),%xmm7 735 je .Lecb_dec_six 736 movups 96(%rdi),%xmm8 737 movups (%rcx),%xmm0 738 xorps %xmm9,%xmm9 739 call _aesni_decrypt8 740 movups %xmm2,(%rsi) 741 pxor %xmm2,%xmm2 742 movups %xmm3,16(%rsi) 743 pxor %xmm3,%xmm3 744 movups %xmm4,32(%rsi) 745 pxor %xmm4,%xmm4 746 movups %xmm5,48(%rsi) 747 pxor %xmm5,%xmm5 748 movups %xmm6,64(%rsi) 749 pxor %xmm6,%xmm6 750 movups %xmm7,80(%rsi) 751 pxor %xmm7,%xmm7 752 movups %xmm8,96(%rsi) 753 pxor %xmm8,%xmm8 754 pxor %xmm9,%xmm9 755 jmp .Lecb_ret 756.align 16 757.Lecb_dec_one: 758 movups (%rcx),%xmm0 759 movups 16(%rcx),%xmm1 760 leaq 32(%rcx),%rcx 761 xorps %xmm0,%xmm2 762.Loop_dec1_4: 763.byte 102,15,56,222,209 764 decl %eax 765 movups (%rcx),%xmm1 766 leaq 16(%rcx),%rcx 767 jnz .Loop_dec1_4 768.byte 102,15,56,223,209 769 movups %xmm2,(%rsi) 770 pxor %xmm2,%xmm2 771 jmp .Lecb_ret 772.align 16 773.Lecb_dec_two: 774 call _aesni_decrypt2 775 movups %xmm2,(%rsi) 776 pxor %xmm2,%xmm2 777 movups %xmm3,16(%rsi) 778 pxor %xmm3,%xmm3 779 jmp .Lecb_ret 780.align 16 781.Lecb_dec_three: 782 call _aesni_decrypt3 783 movups %xmm2,(%rsi) 784 pxor %xmm2,%xmm2 785 movups %xmm3,16(%rsi) 786 pxor %xmm3,%xmm3 787 movups %xmm4,32(%rsi) 788 pxor %xmm4,%xmm4 789 jmp .Lecb_ret 790.align 16 791.Lecb_dec_four: 792 call _aesni_decrypt4 793 movups %xmm2,(%rsi) 794 pxor %xmm2,%xmm2 795 movups %xmm3,16(%rsi) 796 pxor %xmm3,%xmm3 797 movups %xmm4,32(%rsi) 798 pxor %xmm4,%xmm4 799 movups %xmm5,48(%rsi) 800 pxor %xmm5,%xmm5 801 jmp .Lecb_ret 802.align 16 803.Lecb_dec_five: 804 xorps %xmm7,%xmm7 805 call _aesni_decrypt6 806 movups %xmm2,(%rsi) 807 pxor %xmm2,%xmm2 808 movups %xmm3,16(%rsi) 809 pxor %xmm3,%xmm3 810 movups %xmm4,32(%rsi) 811 pxor %xmm4,%xmm4 812 movups %xmm5,48(%rsi) 813 pxor %xmm5,%xmm5 814 movups %xmm6,64(%rsi) 815 pxor %xmm6,%xmm6 816 pxor %xmm7,%xmm7 817 jmp .Lecb_ret 818.align 16 819.Lecb_dec_six: 820 call _aesni_decrypt6 821 movups %xmm2,(%rsi) 822 pxor %xmm2,%xmm2 823 movups %xmm3,16(%rsi) 824 pxor %xmm3,%xmm3 825 movups %xmm4,32(%rsi) 826 pxor %xmm4,%xmm4 827 movups %xmm5,48(%rsi) 828 pxor %xmm5,%xmm5 829 movups %xmm6,64(%rsi) 830 pxor %xmm6,%xmm6 831 movups %xmm7,80(%rsi) 832 pxor %xmm7,%xmm7 833 834.Lecb_ret: 835 xorps %xmm0,%xmm0 836 pxor %xmm1,%xmm1 837 .byte 0xf3,0xc3 838.size aesni_ecb_encrypt,.-aesni_ecb_encrypt 839.globl aesni_ccm64_encrypt_blocks 840.hidden aesni_ccm64_encrypt_blocks 841.type aesni_ccm64_encrypt_blocks,@function 842.align 16 843aesni_ccm64_encrypt_blocks: 844 movl 240(%rcx),%eax 845 movdqu (%r8),%xmm6 846 movdqa .Lincrement64(%rip),%xmm9 847 movdqa .Lbswap_mask(%rip),%xmm7 848 849 shll $4,%eax 850 movl $16,%r10d 851 leaq 0(%rcx),%r11 852 movdqu (%r9),%xmm3 853 movdqa %xmm6,%xmm2 854 leaq 32(%rcx,%rax,1),%rcx 855.byte 102,15,56,0,247 856 subq %rax,%r10 857 jmp .Lccm64_enc_outer 858.align 16 859.Lccm64_enc_outer: 860 movups (%r11),%xmm0 861 movq %r10,%rax 862 movups (%rdi),%xmm8 863 864 xorps %xmm0,%xmm2 865 movups 16(%r11),%xmm1 866 xorps %xmm8,%xmm0 867 xorps %xmm0,%xmm3 868 movups 32(%r11),%xmm0 869 870.Lccm64_enc2_loop: 871.byte 102,15,56,220,209 872.byte 102,15,56,220,217 873 movups (%rcx,%rax,1),%xmm1 874 addq $32,%rax 875.byte 102,15,56,220,208 876.byte 102,15,56,220,216 877 movups -16(%rcx,%rax,1),%xmm0 878 jnz .Lccm64_enc2_loop 879.byte 102,15,56,220,209 880.byte 102,15,56,220,217 881 paddq %xmm9,%xmm6 882 decq %rdx 883.byte 102,15,56,221,208 884.byte 102,15,56,221,216 885 886 leaq 16(%rdi),%rdi 887 xorps %xmm2,%xmm8 888 movdqa %xmm6,%xmm2 889 movups %xmm8,(%rsi) 890.byte 102,15,56,0,215 891 leaq 16(%rsi),%rsi 892 jnz .Lccm64_enc_outer 893 894 pxor %xmm0,%xmm0 895 pxor %xmm1,%xmm1 896 pxor %xmm2,%xmm2 897 movups %xmm3,(%r9) 898 pxor %xmm3,%xmm3 899 pxor %xmm8,%xmm8 900 pxor %xmm6,%xmm6 901 .byte 0xf3,0xc3 902.size aesni_ccm64_encrypt_blocks,.-aesni_ccm64_encrypt_blocks 903.globl aesni_ccm64_decrypt_blocks 904.hidden aesni_ccm64_decrypt_blocks 905.type aesni_ccm64_decrypt_blocks,@function 906.align 16 907aesni_ccm64_decrypt_blocks: 908 movl 240(%rcx),%eax 909 movups (%r8),%xmm6 910 movdqu (%r9),%xmm3 911 movdqa .Lincrement64(%rip),%xmm9 912 movdqa .Lbswap_mask(%rip),%xmm7 913 914 movaps %xmm6,%xmm2 915 movl %eax,%r10d 916 movq %rcx,%r11 917.byte 102,15,56,0,247 918 movups (%rcx),%xmm0 919 movups 16(%rcx),%xmm1 920 leaq 32(%rcx),%rcx 921 xorps %xmm0,%xmm2 922.Loop_enc1_5: 923.byte 102,15,56,220,209 924 decl %eax 925 movups (%rcx),%xmm1 926 leaq 16(%rcx),%rcx 927 jnz .Loop_enc1_5 928.byte 102,15,56,221,209 929 shll $4,%r10d 930 movl $16,%eax 931 movups (%rdi),%xmm8 932 paddq %xmm9,%xmm6 933 leaq 16(%rdi),%rdi 934 subq %r10,%rax 935 leaq 32(%r11,%r10,1),%rcx 936 movq %rax,%r10 937 jmp .Lccm64_dec_outer 938.align 16 939.Lccm64_dec_outer: 940 xorps %xmm2,%xmm8 941 movdqa %xmm6,%xmm2 942 movups %xmm8,(%rsi) 943 leaq 16(%rsi),%rsi 944.byte 102,15,56,0,215 945 946 subq $1,%rdx 947 jz .Lccm64_dec_break 948 949 movups (%r11),%xmm0 950 movq %r10,%rax 951 movups 16(%r11),%xmm1 952 xorps %xmm0,%xmm8 953 xorps %xmm0,%xmm2 954 xorps %xmm8,%xmm3 955 movups 32(%r11),%xmm0 956 jmp .Lccm64_dec2_loop 957.align 16 958.Lccm64_dec2_loop: 959.byte 102,15,56,220,209 960.byte 102,15,56,220,217 961 movups (%rcx,%rax,1),%xmm1 962 addq $32,%rax 963.byte 102,15,56,220,208 964.byte 102,15,56,220,216 965 movups -16(%rcx,%rax,1),%xmm0 966 jnz .Lccm64_dec2_loop 967 movups (%rdi),%xmm8 968 paddq %xmm9,%xmm6 969.byte 102,15,56,220,209 970.byte 102,15,56,220,217 971.byte 102,15,56,221,208 972.byte 102,15,56,221,216 973 leaq 16(%rdi),%rdi 974 jmp .Lccm64_dec_outer 975 976.align 16 977.Lccm64_dec_break: 978 979 movl 240(%r11),%eax 980 movups (%r11),%xmm0 981 movups 16(%r11),%xmm1 982 xorps %xmm0,%xmm8 983 leaq 32(%r11),%r11 984 xorps %xmm8,%xmm3 985.Loop_enc1_6: 986.byte 102,15,56,220,217 987 decl %eax 988 movups (%r11),%xmm1 989 leaq 16(%r11),%r11 990 jnz .Loop_enc1_6 991.byte 102,15,56,221,217 992 pxor %xmm0,%xmm0 993 pxor %xmm1,%xmm1 994 pxor %xmm2,%xmm2 995 movups %xmm3,(%r9) 996 pxor %xmm3,%xmm3 997 pxor %xmm8,%xmm8 998 pxor %xmm6,%xmm6 999 .byte 0xf3,0xc3 1000.size aesni_ccm64_decrypt_blocks,.-aesni_ccm64_decrypt_blocks 1001.globl aesni_ctr32_encrypt_blocks 1002.hidden aesni_ctr32_encrypt_blocks 1003.type aesni_ctr32_encrypt_blocks,@function 1004.align 16 1005aesni_ctr32_encrypt_blocks: 1006 cmpq $1,%rdx 1007 jne .Lctr32_bulk 1008 1009 1010 1011 movups (%r8),%xmm2 1012 movups (%rdi),%xmm3 1013 movl 240(%rcx),%edx 1014 movups (%rcx),%xmm0 1015 movups 16(%rcx),%xmm1 1016 leaq 32(%rcx),%rcx 1017 xorps %xmm0,%xmm2 1018.Loop_enc1_7: 1019.byte 102,15,56,220,209 1020 decl %edx 1021 movups (%rcx),%xmm1 1022 leaq 16(%rcx),%rcx 1023 jnz .Loop_enc1_7 1024.byte 102,15,56,221,209 1025 pxor %xmm0,%xmm0 1026 pxor %xmm1,%xmm1 1027 xorps %xmm3,%xmm2 1028 pxor %xmm3,%xmm3 1029 movups %xmm2,(%rsi) 1030 xorps %xmm2,%xmm2 1031 jmp .Lctr32_epilogue 1032 1033.align 16 1034.Lctr32_bulk: 1035 leaq (%rsp),%r11 1036 pushq %rbp 1037 subq $128,%rsp 1038 andq $-16,%rsp 1039 1040 1041 1042 1043 movdqu (%r8),%xmm2 1044 movdqu (%rcx),%xmm0 1045 movl 12(%r8),%r8d 1046 pxor %xmm0,%xmm2 1047 movl 12(%rcx),%ebp 1048 movdqa %xmm2,0(%rsp) 1049 bswapl %r8d 1050 movdqa %xmm2,%xmm3 1051 movdqa %xmm2,%xmm4 1052 movdqa %xmm2,%xmm5 1053 movdqa %xmm2,64(%rsp) 1054 movdqa %xmm2,80(%rsp) 1055 movdqa %xmm2,96(%rsp) 1056 movq %rdx,%r10 1057 movdqa %xmm2,112(%rsp) 1058 1059 leaq 1(%r8),%rax 1060 leaq 2(%r8),%rdx 1061 bswapl %eax 1062 bswapl %edx 1063 xorl %ebp,%eax 1064 xorl %ebp,%edx 1065.byte 102,15,58,34,216,3 1066 leaq 3(%r8),%rax 1067 movdqa %xmm3,16(%rsp) 1068.byte 102,15,58,34,226,3 1069 bswapl %eax 1070 movq %r10,%rdx 1071 leaq 4(%r8),%r10 1072 movdqa %xmm4,32(%rsp) 1073 xorl %ebp,%eax 1074 bswapl %r10d 1075.byte 102,15,58,34,232,3 1076 xorl %ebp,%r10d 1077 movdqa %xmm5,48(%rsp) 1078 leaq 5(%r8),%r9 1079 movl %r10d,64+12(%rsp) 1080 bswapl %r9d 1081 leaq 6(%r8),%r10 1082 movl 240(%rcx),%eax 1083 xorl %ebp,%r9d 1084 bswapl %r10d 1085 movl %r9d,80+12(%rsp) 1086 xorl %ebp,%r10d 1087 leaq 7(%r8),%r9 1088 movl %r10d,96+12(%rsp) 1089 bswapl %r9d 1090 movl OPENSSL_ia32cap_P+4(%rip),%r10d 1091 xorl %ebp,%r9d 1092 andl $71303168,%r10d 1093 movl %r9d,112+12(%rsp) 1094 1095 movups 16(%rcx),%xmm1 1096 1097 movdqa 64(%rsp),%xmm6 1098 movdqa 80(%rsp),%xmm7 1099 1100 cmpq $8,%rdx 1101 jb .Lctr32_tail 1102 1103 subq $6,%rdx 1104 cmpl $4194304,%r10d 1105 je .Lctr32_6x 1106 1107 leaq 128(%rcx),%rcx 1108 subq $2,%rdx 1109 jmp .Lctr32_loop8 1110 1111.align 16 1112.Lctr32_6x: 1113 shll $4,%eax 1114 movl $48,%r10d 1115 bswapl %ebp 1116 leaq 32(%rcx,%rax,1),%rcx 1117 subq %rax,%r10 1118 jmp .Lctr32_loop6 1119 1120.align 16 1121.Lctr32_loop6: 1122 addl $6,%r8d 1123 movups -48(%rcx,%r10,1),%xmm0 1124.byte 102,15,56,220,209 1125 movl %r8d,%eax 1126 xorl %ebp,%eax 1127.byte 102,15,56,220,217 1128.byte 0x0f,0x38,0xf1,0x44,0x24,12 1129 leal 1(%r8),%eax 1130.byte 102,15,56,220,225 1131 xorl %ebp,%eax 1132.byte 0x0f,0x38,0xf1,0x44,0x24,28 1133.byte 102,15,56,220,233 1134 leal 2(%r8),%eax 1135 xorl %ebp,%eax 1136.byte 102,15,56,220,241 1137.byte 0x0f,0x38,0xf1,0x44,0x24,44 1138 leal 3(%r8),%eax 1139.byte 102,15,56,220,249 1140 movups -32(%rcx,%r10,1),%xmm1 1141 xorl %ebp,%eax 1142 1143.byte 102,15,56,220,208 1144.byte 0x0f,0x38,0xf1,0x44,0x24,60 1145 leal 4(%r8),%eax 1146.byte 102,15,56,220,216 1147 xorl %ebp,%eax 1148.byte 0x0f,0x38,0xf1,0x44,0x24,76 1149.byte 102,15,56,220,224 1150 leal 5(%r8),%eax 1151 xorl %ebp,%eax 1152.byte 102,15,56,220,232 1153.byte 0x0f,0x38,0xf1,0x44,0x24,92 1154 movq %r10,%rax 1155.byte 102,15,56,220,240 1156.byte 102,15,56,220,248 1157 movups -16(%rcx,%r10,1),%xmm0 1158 1159 call .Lenc_loop6 1160 1161 movdqu (%rdi),%xmm8 1162 movdqu 16(%rdi),%xmm9 1163 movdqu 32(%rdi),%xmm10 1164 movdqu 48(%rdi),%xmm11 1165 movdqu 64(%rdi),%xmm12 1166 movdqu 80(%rdi),%xmm13 1167 leaq 96(%rdi),%rdi 1168 movups -64(%rcx,%r10,1),%xmm1 1169 pxor %xmm2,%xmm8 1170 movaps 0(%rsp),%xmm2 1171 pxor %xmm3,%xmm9 1172 movaps 16(%rsp),%xmm3 1173 pxor %xmm4,%xmm10 1174 movaps 32(%rsp),%xmm4 1175 pxor %xmm5,%xmm11 1176 movaps 48(%rsp),%xmm5 1177 pxor %xmm6,%xmm12 1178 movaps 64(%rsp),%xmm6 1179 pxor %xmm7,%xmm13 1180 movaps 80(%rsp),%xmm7 1181 movdqu %xmm8,(%rsi) 1182 movdqu %xmm9,16(%rsi) 1183 movdqu %xmm10,32(%rsi) 1184 movdqu %xmm11,48(%rsi) 1185 movdqu %xmm12,64(%rsi) 1186 movdqu %xmm13,80(%rsi) 1187 leaq 96(%rsi),%rsi 1188 1189 subq $6,%rdx 1190 jnc .Lctr32_loop6 1191 1192 addq $6,%rdx 1193 jz .Lctr32_done 1194 1195 leal -48(%r10),%eax 1196 leaq -80(%rcx,%r10,1),%rcx 1197 negl %eax 1198 shrl $4,%eax 1199 jmp .Lctr32_tail 1200 1201.align 32 1202.Lctr32_loop8: 1203 addl $8,%r8d 1204 movdqa 96(%rsp),%xmm8 1205.byte 102,15,56,220,209 1206 movl %r8d,%r9d 1207 movdqa 112(%rsp),%xmm9 1208.byte 102,15,56,220,217 1209 bswapl %r9d 1210 movups 32-128(%rcx),%xmm0 1211.byte 102,15,56,220,225 1212 xorl %ebp,%r9d 1213 nop 1214.byte 102,15,56,220,233 1215 movl %r9d,0+12(%rsp) 1216 leaq 1(%r8),%r9 1217.byte 102,15,56,220,241 1218.byte 102,15,56,220,249 1219.byte 102,68,15,56,220,193 1220.byte 102,68,15,56,220,201 1221 movups 48-128(%rcx),%xmm1 1222 bswapl %r9d 1223.byte 102,15,56,220,208 1224.byte 102,15,56,220,216 1225 xorl %ebp,%r9d 1226.byte 0x66,0x90 1227.byte 102,15,56,220,224 1228.byte 102,15,56,220,232 1229 movl %r9d,16+12(%rsp) 1230 leaq 2(%r8),%r9 1231.byte 102,15,56,220,240 1232.byte 102,15,56,220,248 1233.byte 102,68,15,56,220,192 1234.byte 102,68,15,56,220,200 1235 movups 64-128(%rcx),%xmm0 1236 bswapl %r9d 1237.byte 102,15,56,220,209 1238.byte 102,15,56,220,217 1239 xorl %ebp,%r9d 1240.byte 0x66,0x90 1241.byte 102,15,56,220,225 1242.byte 102,15,56,220,233 1243 movl %r9d,32+12(%rsp) 1244 leaq 3(%r8),%r9 1245.byte 102,15,56,220,241 1246.byte 102,15,56,220,249 1247.byte 102,68,15,56,220,193 1248.byte 102,68,15,56,220,201 1249 movups 80-128(%rcx),%xmm1 1250 bswapl %r9d 1251.byte 102,15,56,220,208 1252.byte 102,15,56,220,216 1253 xorl %ebp,%r9d 1254.byte 0x66,0x90 1255.byte 102,15,56,220,224 1256.byte 102,15,56,220,232 1257 movl %r9d,48+12(%rsp) 1258 leaq 4(%r8),%r9 1259.byte 102,15,56,220,240 1260.byte 102,15,56,220,248 1261.byte 102,68,15,56,220,192 1262.byte 102,68,15,56,220,200 1263 movups 96-128(%rcx),%xmm0 1264 bswapl %r9d 1265.byte 102,15,56,220,209 1266.byte 102,15,56,220,217 1267 xorl %ebp,%r9d 1268.byte 0x66,0x90 1269.byte 102,15,56,220,225 1270.byte 102,15,56,220,233 1271 movl %r9d,64+12(%rsp) 1272 leaq 5(%r8),%r9 1273.byte 102,15,56,220,241 1274.byte 102,15,56,220,249 1275.byte 102,68,15,56,220,193 1276.byte 102,68,15,56,220,201 1277 movups 112-128(%rcx),%xmm1 1278 bswapl %r9d 1279.byte 102,15,56,220,208 1280.byte 102,15,56,220,216 1281 xorl %ebp,%r9d 1282.byte 0x66,0x90 1283.byte 102,15,56,220,224 1284.byte 102,15,56,220,232 1285 movl %r9d,80+12(%rsp) 1286 leaq 6(%r8),%r9 1287.byte 102,15,56,220,240 1288.byte 102,15,56,220,248 1289.byte 102,68,15,56,220,192 1290.byte 102,68,15,56,220,200 1291 movups 128-128(%rcx),%xmm0 1292 bswapl %r9d 1293.byte 102,15,56,220,209 1294.byte 102,15,56,220,217 1295 xorl %ebp,%r9d 1296.byte 0x66,0x90 1297.byte 102,15,56,220,225 1298.byte 102,15,56,220,233 1299 movl %r9d,96+12(%rsp) 1300 leaq 7(%r8),%r9 1301.byte 102,15,56,220,241 1302.byte 102,15,56,220,249 1303.byte 102,68,15,56,220,193 1304.byte 102,68,15,56,220,201 1305 movups 144-128(%rcx),%xmm1 1306 bswapl %r9d 1307.byte 102,15,56,220,208 1308.byte 102,15,56,220,216 1309.byte 102,15,56,220,224 1310 xorl %ebp,%r9d 1311 movdqu 0(%rdi),%xmm10 1312.byte 102,15,56,220,232 1313 movl %r9d,112+12(%rsp) 1314 cmpl $11,%eax 1315.byte 102,15,56,220,240 1316.byte 102,15,56,220,248 1317.byte 102,68,15,56,220,192 1318.byte 102,68,15,56,220,200 1319 movups 160-128(%rcx),%xmm0 1320 1321 jb .Lctr32_enc_done 1322 1323.byte 102,15,56,220,209 1324.byte 102,15,56,220,217 1325.byte 102,15,56,220,225 1326.byte 102,15,56,220,233 1327.byte 102,15,56,220,241 1328.byte 102,15,56,220,249 1329.byte 102,68,15,56,220,193 1330.byte 102,68,15,56,220,201 1331 movups 176-128(%rcx),%xmm1 1332 1333.byte 102,15,56,220,208 1334.byte 102,15,56,220,216 1335.byte 102,15,56,220,224 1336.byte 102,15,56,220,232 1337.byte 102,15,56,220,240 1338.byte 102,15,56,220,248 1339.byte 102,68,15,56,220,192 1340.byte 102,68,15,56,220,200 1341 movups 192-128(%rcx),%xmm0 1342 je .Lctr32_enc_done 1343 1344.byte 102,15,56,220,209 1345.byte 102,15,56,220,217 1346.byte 102,15,56,220,225 1347.byte 102,15,56,220,233 1348.byte 102,15,56,220,241 1349.byte 102,15,56,220,249 1350.byte 102,68,15,56,220,193 1351.byte 102,68,15,56,220,201 1352 movups 208-128(%rcx),%xmm1 1353 1354.byte 102,15,56,220,208 1355.byte 102,15,56,220,216 1356.byte 102,15,56,220,224 1357.byte 102,15,56,220,232 1358.byte 102,15,56,220,240 1359.byte 102,15,56,220,248 1360.byte 102,68,15,56,220,192 1361.byte 102,68,15,56,220,200 1362 movups 224-128(%rcx),%xmm0 1363 jmp .Lctr32_enc_done 1364 1365.align 16 1366.Lctr32_enc_done: 1367 movdqu 16(%rdi),%xmm11 1368 pxor %xmm0,%xmm10 1369 movdqu 32(%rdi),%xmm12 1370 pxor %xmm0,%xmm11 1371 movdqu 48(%rdi),%xmm13 1372 pxor %xmm0,%xmm12 1373 movdqu 64(%rdi),%xmm14 1374 pxor %xmm0,%xmm13 1375 movdqu 80(%rdi),%xmm15 1376 pxor %xmm0,%xmm14 1377 pxor %xmm0,%xmm15 1378.byte 102,15,56,220,209 1379.byte 102,15,56,220,217 1380.byte 102,15,56,220,225 1381.byte 102,15,56,220,233 1382.byte 102,15,56,220,241 1383.byte 102,15,56,220,249 1384.byte 102,68,15,56,220,193 1385.byte 102,68,15,56,220,201 1386 movdqu 96(%rdi),%xmm1 1387 leaq 128(%rdi),%rdi 1388 1389.byte 102,65,15,56,221,210 1390 pxor %xmm0,%xmm1 1391 movdqu 112-128(%rdi),%xmm10 1392.byte 102,65,15,56,221,219 1393 pxor %xmm0,%xmm10 1394 movdqa 0(%rsp),%xmm11 1395.byte 102,65,15,56,221,228 1396.byte 102,65,15,56,221,237 1397 movdqa 16(%rsp),%xmm12 1398 movdqa 32(%rsp),%xmm13 1399.byte 102,65,15,56,221,246 1400.byte 102,65,15,56,221,255 1401 movdqa 48(%rsp),%xmm14 1402 movdqa 64(%rsp),%xmm15 1403.byte 102,68,15,56,221,193 1404 movdqa 80(%rsp),%xmm0 1405 movups 16-128(%rcx),%xmm1 1406.byte 102,69,15,56,221,202 1407 1408 movups %xmm2,(%rsi) 1409 movdqa %xmm11,%xmm2 1410 movups %xmm3,16(%rsi) 1411 movdqa %xmm12,%xmm3 1412 movups %xmm4,32(%rsi) 1413 movdqa %xmm13,%xmm4 1414 movups %xmm5,48(%rsi) 1415 movdqa %xmm14,%xmm5 1416 movups %xmm6,64(%rsi) 1417 movdqa %xmm15,%xmm6 1418 movups %xmm7,80(%rsi) 1419 movdqa %xmm0,%xmm7 1420 movups %xmm8,96(%rsi) 1421 movups %xmm9,112(%rsi) 1422 leaq 128(%rsi),%rsi 1423 1424 subq $8,%rdx 1425 jnc .Lctr32_loop8 1426 1427 addq $8,%rdx 1428 jz .Lctr32_done 1429 leaq -128(%rcx),%rcx 1430 1431.Lctr32_tail: 1432 1433 1434 leaq 16(%rcx),%rcx 1435 cmpq $4,%rdx 1436 jb .Lctr32_loop3 1437 je .Lctr32_loop4 1438 1439 1440 shll $4,%eax 1441 movdqa 96(%rsp),%xmm8 1442 pxor %xmm9,%xmm9 1443 1444 movups 16(%rcx),%xmm0 1445.byte 102,15,56,220,209 1446.byte 102,15,56,220,217 1447 leaq 32-16(%rcx,%rax,1),%rcx 1448 negq %rax 1449.byte 102,15,56,220,225 1450 addq $16,%rax 1451 movups (%rdi),%xmm10 1452.byte 102,15,56,220,233 1453.byte 102,15,56,220,241 1454 movups 16(%rdi),%xmm11 1455 movups 32(%rdi),%xmm12 1456.byte 102,15,56,220,249 1457.byte 102,68,15,56,220,193 1458 1459 call .Lenc_loop8_enter 1460 1461 movdqu 48(%rdi),%xmm13 1462 pxor %xmm10,%xmm2 1463 movdqu 64(%rdi),%xmm10 1464 pxor %xmm11,%xmm3 1465 movdqu %xmm2,(%rsi) 1466 pxor %xmm12,%xmm4 1467 movdqu %xmm3,16(%rsi) 1468 pxor %xmm13,%xmm5 1469 movdqu %xmm4,32(%rsi) 1470 pxor %xmm10,%xmm6 1471 movdqu %xmm5,48(%rsi) 1472 movdqu %xmm6,64(%rsi) 1473 cmpq $6,%rdx 1474 jb .Lctr32_done 1475 1476 movups 80(%rdi),%xmm11 1477 xorps %xmm11,%xmm7 1478 movups %xmm7,80(%rsi) 1479 je .Lctr32_done 1480 1481 movups 96(%rdi),%xmm12 1482 xorps %xmm12,%xmm8 1483 movups %xmm8,96(%rsi) 1484 jmp .Lctr32_done 1485 1486.align 32 1487.Lctr32_loop4: 1488.byte 102,15,56,220,209 1489 leaq 16(%rcx),%rcx 1490 decl %eax 1491.byte 102,15,56,220,217 1492.byte 102,15,56,220,225 1493.byte 102,15,56,220,233 1494 movups (%rcx),%xmm1 1495 jnz .Lctr32_loop4 1496.byte 102,15,56,221,209 1497.byte 102,15,56,221,217 1498 movups (%rdi),%xmm10 1499 movups 16(%rdi),%xmm11 1500.byte 102,15,56,221,225 1501.byte 102,15,56,221,233 1502 movups 32(%rdi),%xmm12 1503 movups 48(%rdi),%xmm13 1504 1505 xorps %xmm10,%xmm2 1506 movups %xmm2,(%rsi) 1507 xorps %xmm11,%xmm3 1508 movups %xmm3,16(%rsi) 1509 pxor %xmm12,%xmm4 1510 movdqu %xmm4,32(%rsi) 1511 pxor %xmm13,%xmm5 1512 movdqu %xmm5,48(%rsi) 1513 jmp .Lctr32_done 1514 1515.align 32 1516.Lctr32_loop3: 1517.byte 102,15,56,220,209 1518 leaq 16(%rcx),%rcx 1519 decl %eax 1520.byte 102,15,56,220,217 1521.byte 102,15,56,220,225 1522 movups (%rcx),%xmm1 1523 jnz .Lctr32_loop3 1524.byte 102,15,56,221,209 1525.byte 102,15,56,221,217 1526.byte 102,15,56,221,225 1527 1528 movups (%rdi),%xmm10 1529 xorps %xmm10,%xmm2 1530 movups %xmm2,(%rsi) 1531 cmpq $2,%rdx 1532 jb .Lctr32_done 1533 1534 movups 16(%rdi),%xmm11 1535 xorps %xmm11,%xmm3 1536 movups %xmm3,16(%rsi) 1537 je .Lctr32_done 1538 1539 movups 32(%rdi),%xmm12 1540 xorps %xmm12,%xmm4 1541 movups %xmm4,32(%rsi) 1542 1543.Lctr32_done: 1544 xorps %xmm0,%xmm0 1545 xorl %ebp,%ebp 1546 pxor %xmm1,%xmm1 1547 pxor %xmm2,%xmm2 1548 pxor %xmm3,%xmm3 1549 pxor %xmm4,%xmm4 1550 pxor %xmm5,%xmm5 1551 pxor %xmm6,%xmm6 1552 pxor %xmm7,%xmm7 1553 movaps %xmm0,0(%rsp) 1554 pxor %xmm8,%xmm8 1555 movaps %xmm0,16(%rsp) 1556 pxor %xmm9,%xmm9 1557 movaps %xmm0,32(%rsp) 1558 pxor %xmm10,%xmm10 1559 movaps %xmm0,48(%rsp) 1560 pxor %xmm11,%xmm11 1561 movaps %xmm0,64(%rsp) 1562 pxor %xmm12,%xmm12 1563 movaps %xmm0,80(%rsp) 1564 pxor %xmm13,%xmm13 1565 movaps %xmm0,96(%rsp) 1566 pxor %xmm14,%xmm14 1567 movaps %xmm0,112(%rsp) 1568 pxor %xmm15,%xmm15 1569 movq -8(%r11),%rbp 1570 leaq (%r11),%rsp 1571.Lctr32_epilogue: 1572 .byte 0xf3,0xc3 1573.size aesni_ctr32_encrypt_blocks,.-aesni_ctr32_encrypt_blocks 1574.globl aesni_xts_encrypt 1575.hidden aesni_xts_encrypt 1576.type aesni_xts_encrypt,@function 1577.align 16 1578aesni_xts_encrypt: 1579 leaq (%rsp),%r11 1580 pushq %rbp 1581 subq $112,%rsp 1582 andq $-16,%rsp 1583 movups (%r9),%xmm2 1584 movl 240(%r8),%eax 1585 movl 240(%rcx),%r10d 1586 movups (%r8),%xmm0 1587 movups 16(%r8),%xmm1 1588 leaq 32(%r8),%r8 1589 xorps %xmm0,%xmm2 1590.Loop_enc1_8: 1591.byte 102,15,56,220,209 1592 decl %eax 1593 movups (%r8),%xmm1 1594 leaq 16(%r8),%r8 1595 jnz .Loop_enc1_8 1596.byte 102,15,56,221,209 1597 movups (%rcx),%xmm0 1598 movq %rcx,%rbp 1599 movl %r10d,%eax 1600 shll $4,%r10d 1601 movq %rdx,%r9 1602 andq $-16,%rdx 1603 1604 movups 16(%rcx,%r10,1),%xmm1 1605 1606 movdqa .Lxts_magic(%rip),%xmm8 1607 movdqa %xmm2,%xmm15 1608 pshufd $0x5f,%xmm2,%xmm9 1609 pxor %xmm0,%xmm1 1610 movdqa %xmm9,%xmm14 1611 paddd %xmm9,%xmm9 1612 movdqa %xmm15,%xmm10 1613 psrad $31,%xmm14 1614 paddq %xmm15,%xmm15 1615 pand %xmm8,%xmm14 1616 pxor %xmm0,%xmm10 1617 pxor %xmm14,%xmm15 1618 movdqa %xmm9,%xmm14 1619 paddd %xmm9,%xmm9 1620 movdqa %xmm15,%xmm11 1621 psrad $31,%xmm14 1622 paddq %xmm15,%xmm15 1623 pand %xmm8,%xmm14 1624 pxor %xmm0,%xmm11 1625 pxor %xmm14,%xmm15 1626 movdqa %xmm9,%xmm14 1627 paddd %xmm9,%xmm9 1628 movdqa %xmm15,%xmm12 1629 psrad $31,%xmm14 1630 paddq %xmm15,%xmm15 1631 pand %xmm8,%xmm14 1632 pxor %xmm0,%xmm12 1633 pxor %xmm14,%xmm15 1634 movdqa %xmm9,%xmm14 1635 paddd %xmm9,%xmm9 1636 movdqa %xmm15,%xmm13 1637 psrad $31,%xmm14 1638 paddq %xmm15,%xmm15 1639 pand %xmm8,%xmm14 1640 pxor %xmm0,%xmm13 1641 pxor %xmm14,%xmm15 1642 movdqa %xmm15,%xmm14 1643 psrad $31,%xmm9 1644 paddq %xmm15,%xmm15 1645 pand %xmm8,%xmm9 1646 pxor %xmm0,%xmm14 1647 pxor %xmm9,%xmm15 1648 movaps %xmm1,96(%rsp) 1649 1650 subq $96,%rdx 1651 jc .Lxts_enc_short 1652 1653 movl $16+96,%eax 1654 leaq 32(%rbp,%r10,1),%rcx 1655 subq %r10,%rax 1656 movups 16(%rbp),%xmm1 1657 movq %rax,%r10 1658 leaq .Lxts_magic(%rip),%r8 1659 jmp .Lxts_enc_grandloop 1660 1661.align 32 1662.Lxts_enc_grandloop: 1663 movdqu 0(%rdi),%xmm2 1664 movdqa %xmm0,%xmm8 1665 movdqu 16(%rdi),%xmm3 1666 pxor %xmm10,%xmm2 1667 movdqu 32(%rdi),%xmm4 1668 pxor %xmm11,%xmm3 1669.byte 102,15,56,220,209 1670 movdqu 48(%rdi),%xmm5 1671 pxor %xmm12,%xmm4 1672.byte 102,15,56,220,217 1673 movdqu 64(%rdi),%xmm6 1674 pxor %xmm13,%xmm5 1675.byte 102,15,56,220,225 1676 movdqu 80(%rdi),%xmm7 1677 pxor %xmm15,%xmm8 1678 movdqa 96(%rsp),%xmm9 1679 pxor %xmm14,%xmm6 1680.byte 102,15,56,220,233 1681 movups 32(%rbp),%xmm0 1682 leaq 96(%rdi),%rdi 1683 pxor %xmm8,%xmm7 1684 1685 pxor %xmm9,%xmm10 1686.byte 102,15,56,220,241 1687 pxor %xmm9,%xmm11 1688 movdqa %xmm10,0(%rsp) 1689.byte 102,15,56,220,249 1690 movups 48(%rbp),%xmm1 1691 pxor %xmm9,%xmm12 1692 1693.byte 102,15,56,220,208 1694 pxor %xmm9,%xmm13 1695 movdqa %xmm11,16(%rsp) 1696.byte 102,15,56,220,216 1697 pxor %xmm9,%xmm14 1698 movdqa %xmm12,32(%rsp) 1699.byte 102,15,56,220,224 1700.byte 102,15,56,220,232 1701 pxor %xmm9,%xmm8 1702 movdqa %xmm14,64(%rsp) 1703.byte 102,15,56,220,240 1704.byte 102,15,56,220,248 1705 movups 64(%rbp),%xmm0 1706 movdqa %xmm8,80(%rsp) 1707 pshufd $0x5f,%xmm15,%xmm9 1708 jmp .Lxts_enc_loop6 1709.align 32 1710.Lxts_enc_loop6: 1711.byte 102,15,56,220,209 1712.byte 102,15,56,220,217 1713.byte 102,15,56,220,225 1714.byte 102,15,56,220,233 1715.byte 102,15,56,220,241 1716.byte 102,15,56,220,249 1717 movups -64(%rcx,%rax,1),%xmm1 1718 addq $32,%rax 1719 1720.byte 102,15,56,220,208 1721.byte 102,15,56,220,216 1722.byte 102,15,56,220,224 1723.byte 102,15,56,220,232 1724.byte 102,15,56,220,240 1725.byte 102,15,56,220,248 1726 movups -80(%rcx,%rax,1),%xmm0 1727 jnz .Lxts_enc_loop6 1728 1729 movdqa (%r8),%xmm8 1730 movdqa %xmm9,%xmm14 1731 paddd %xmm9,%xmm9 1732.byte 102,15,56,220,209 1733 paddq %xmm15,%xmm15 1734 psrad $31,%xmm14 1735.byte 102,15,56,220,217 1736 pand %xmm8,%xmm14 1737 movups (%rbp),%xmm10 1738.byte 102,15,56,220,225 1739.byte 102,15,56,220,233 1740.byte 102,15,56,220,241 1741 pxor %xmm14,%xmm15 1742 movaps %xmm10,%xmm11 1743.byte 102,15,56,220,249 1744 movups -64(%rcx),%xmm1 1745 1746 movdqa %xmm9,%xmm14 1747.byte 102,15,56,220,208 1748 paddd %xmm9,%xmm9 1749 pxor %xmm15,%xmm10 1750.byte 102,15,56,220,216 1751 psrad $31,%xmm14 1752 paddq %xmm15,%xmm15 1753.byte 102,15,56,220,224 1754.byte 102,15,56,220,232 1755 pand %xmm8,%xmm14 1756 movaps %xmm11,%xmm12 1757.byte 102,15,56,220,240 1758 pxor %xmm14,%xmm15 1759 movdqa %xmm9,%xmm14 1760.byte 102,15,56,220,248 1761 movups -48(%rcx),%xmm0 1762 1763 paddd %xmm9,%xmm9 1764.byte 102,15,56,220,209 1765 pxor %xmm15,%xmm11 1766 psrad $31,%xmm14 1767.byte 102,15,56,220,217 1768 paddq %xmm15,%xmm15 1769 pand %xmm8,%xmm14 1770.byte 102,15,56,220,225 1771.byte 102,15,56,220,233 1772 movdqa %xmm13,48(%rsp) 1773 pxor %xmm14,%xmm15 1774.byte 102,15,56,220,241 1775 movaps %xmm12,%xmm13 1776 movdqa %xmm9,%xmm14 1777.byte 102,15,56,220,249 1778 movups -32(%rcx),%xmm1 1779 1780 paddd %xmm9,%xmm9 1781.byte 102,15,56,220,208 1782 pxor %xmm15,%xmm12 1783 psrad $31,%xmm14 1784.byte 102,15,56,220,216 1785 paddq %xmm15,%xmm15 1786 pand %xmm8,%xmm14 1787.byte 102,15,56,220,224 1788.byte 102,15,56,220,232 1789.byte 102,15,56,220,240 1790 pxor %xmm14,%xmm15 1791 movaps %xmm13,%xmm14 1792.byte 102,15,56,220,248 1793 1794 movdqa %xmm9,%xmm0 1795 paddd %xmm9,%xmm9 1796.byte 102,15,56,220,209 1797 pxor %xmm15,%xmm13 1798 psrad $31,%xmm0 1799.byte 102,15,56,220,217 1800 paddq %xmm15,%xmm15 1801 pand %xmm8,%xmm0 1802.byte 102,15,56,220,225 1803.byte 102,15,56,220,233 1804 pxor %xmm0,%xmm15 1805 movups (%rbp),%xmm0 1806.byte 102,15,56,220,241 1807.byte 102,15,56,220,249 1808 movups 16(%rbp),%xmm1 1809 1810 pxor %xmm15,%xmm14 1811.byte 102,15,56,221,84,36,0 1812 psrad $31,%xmm9 1813 paddq %xmm15,%xmm15 1814.byte 102,15,56,221,92,36,16 1815.byte 102,15,56,221,100,36,32 1816 pand %xmm8,%xmm9 1817 movq %r10,%rax 1818.byte 102,15,56,221,108,36,48 1819.byte 102,15,56,221,116,36,64 1820.byte 102,15,56,221,124,36,80 1821 pxor %xmm9,%xmm15 1822 1823 leaq 96(%rsi),%rsi 1824 movups %xmm2,-96(%rsi) 1825 movups %xmm3,-80(%rsi) 1826 movups %xmm4,-64(%rsi) 1827 movups %xmm5,-48(%rsi) 1828 movups %xmm6,-32(%rsi) 1829 movups %xmm7,-16(%rsi) 1830 subq $96,%rdx 1831 jnc .Lxts_enc_grandloop 1832 1833 movl $16+96,%eax 1834 subl %r10d,%eax 1835 movq %rbp,%rcx 1836 shrl $4,%eax 1837 1838.Lxts_enc_short: 1839 1840 movl %eax,%r10d 1841 pxor %xmm0,%xmm10 1842 addq $96,%rdx 1843 jz .Lxts_enc_done 1844 1845 pxor %xmm0,%xmm11 1846 cmpq $0x20,%rdx 1847 jb .Lxts_enc_one 1848 pxor %xmm0,%xmm12 1849 je .Lxts_enc_two 1850 1851 pxor %xmm0,%xmm13 1852 cmpq $0x40,%rdx 1853 jb .Lxts_enc_three 1854 pxor %xmm0,%xmm14 1855 je .Lxts_enc_four 1856 1857 movdqu (%rdi),%xmm2 1858 movdqu 16(%rdi),%xmm3 1859 movdqu 32(%rdi),%xmm4 1860 pxor %xmm10,%xmm2 1861 movdqu 48(%rdi),%xmm5 1862 pxor %xmm11,%xmm3 1863 movdqu 64(%rdi),%xmm6 1864 leaq 80(%rdi),%rdi 1865 pxor %xmm12,%xmm4 1866 pxor %xmm13,%xmm5 1867 pxor %xmm14,%xmm6 1868 pxor %xmm7,%xmm7 1869 1870 call _aesni_encrypt6 1871 1872 xorps %xmm10,%xmm2 1873 movdqa %xmm15,%xmm10 1874 xorps %xmm11,%xmm3 1875 xorps %xmm12,%xmm4 1876 movdqu %xmm2,(%rsi) 1877 xorps %xmm13,%xmm5 1878 movdqu %xmm3,16(%rsi) 1879 xorps %xmm14,%xmm6 1880 movdqu %xmm4,32(%rsi) 1881 movdqu %xmm5,48(%rsi) 1882 movdqu %xmm6,64(%rsi) 1883 leaq 80(%rsi),%rsi 1884 jmp .Lxts_enc_done 1885 1886.align 16 1887.Lxts_enc_one: 1888 movups (%rdi),%xmm2 1889 leaq 16(%rdi),%rdi 1890 xorps %xmm10,%xmm2 1891 movups (%rcx),%xmm0 1892 movups 16(%rcx),%xmm1 1893 leaq 32(%rcx),%rcx 1894 xorps %xmm0,%xmm2 1895.Loop_enc1_9: 1896.byte 102,15,56,220,209 1897 decl %eax 1898 movups (%rcx),%xmm1 1899 leaq 16(%rcx),%rcx 1900 jnz .Loop_enc1_9 1901.byte 102,15,56,221,209 1902 xorps %xmm10,%xmm2 1903 movdqa %xmm11,%xmm10 1904 movups %xmm2,(%rsi) 1905 leaq 16(%rsi),%rsi 1906 jmp .Lxts_enc_done 1907 1908.align 16 1909.Lxts_enc_two: 1910 movups (%rdi),%xmm2 1911 movups 16(%rdi),%xmm3 1912 leaq 32(%rdi),%rdi 1913 xorps %xmm10,%xmm2 1914 xorps %xmm11,%xmm3 1915 1916 call _aesni_encrypt2 1917 1918 xorps %xmm10,%xmm2 1919 movdqa %xmm12,%xmm10 1920 xorps %xmm11,%xmm3 1921 movups %xmm2,(%rsi) 1922 movups %xmm3,16(%rsi) 1923 leaq 32(%rsi),%rsi 1924 jmp .Lxts_enc_done 1925 1926.align 16 1927.Lxts_enc_three: 1928 movups (%rdi),%xmm2 1929 movups 16(%rdi),%xmm3 1930 movups 32(%rdi),%xmm4 1931 leaq 48(%rdi),%rdi 1932 xorps %xmm10,%xmm2 1933 xorps %xmm11,%xmm3 1934 xorps %xmm12,%xmm4 1935 1936 call _aesni_encrypt3 1937 1938 xorps %xmm10,%xmm2 1939 movdqa %xmm13,%xmm10 1940 xorps %xmm11,%xmm3 1941 xorps %xmm12,%xmm4 1942 movups %xmm2,(%rsi) 1943 movups %xmm3,16(%rsi) 1944 movups %xmm4,32(%rsi) 1945 leaq 48(%rsi),%rsi 1946 jmp .Lxts_enc_done 1947 1948.align 16 1949.Lxts_enc_four: 1950 movups (%rdi),%xmm2 1951 movups 16(%rdi),%xmm3 1952 movups 32(%rdi),%xmm4 1953 xorps %xmm10,%xmm2 1954 movups 48(%rdi),%xmm5 1955 leaq 64(%rdi),%rdi 1956 xorps %xmm11,%xmm3 1957 xorps %xmm12,%xmm4 1958 xorps %xmm13,%xmm5 1959 1960 call _aesni_encrypt4 1961 1962 pxor %xmm10,%xmm2 1963 movdqa %xmm14,%xmm10 1964 pxor %xmm11,%xmm3 1965 pxor %xmm12,%xmm4 1966 movdqu %xmm2,(%rsi) 1967 pxor %xmm13,%xmm5 1968 movdqu %xmm3,16(%rsi) 1969 movdqu %xmm4,32(%rsi) 1970 movdqu %xmm5,48(%rsi) 1971 leaq 64(%rsi),%rsi 1972 jmp .Lxts_enc_done 1973 1974.align 16 1975.Lxts_enc_done: 1976 andq $15,%r9 1977 jz .Lxts_enc_ret 1978 movq %r9,%rdx 1979 1980.Lxts_enc_steal: 1981 movzbl (%rdi),%eax 1982 movzbl -16(%rsi),%ecx 1983 leaq 1(%rdi),%rdi 1984 movb %al,-16(%rsi) 1985 movb %cl,0(%rsi) 1986 leaq 1(%rsi),%rsi 1987 subq $1,%rdx 1988 jnz .Lxts_enc_steal 1989 1990 subq %r9,%rsi 1991 movq %rbp,%rcx 1992 movl %r10d,%eax 1993 1994 movups -16(%rsi),%xmm2 1995 xorps %xmm10,%xmm2 1996 movups (%rcx),%xmm0 1997 movups 16(%rcx),%xmm1 1998 leaq 32(%rcx),%rcx 1999 xorps %xmm0,%xmm2 2000.Loop_enc1_10: 2001.byte 102,15,56,220,209 2002 decl %eax 2003 movups (%rcx),%xmm1 2004 leaq 16(%rcx),%rcx 2005 jnz .Loop_enc1_10 2006.byte 102,15,56,221,209 2007 xorps %xmm10,%xmm2 2008 movups %xmm2,-16(%rsi) 2009 2010.Lxts_enc_ret: 2011 xorps %xmm0,%xmm0 2012 pxor %xmm1,%xmm1 2013 pxor %xmm2,%xmm2 2014 pxor %xmm3,%xmm3 2015 pxor %xmm4,%xmm4 2016 pxor %xmm5,%xmm5 2017 pxor %xmm6,%xmm6 2018 pxor %xmm7,%xmm7 2019 movaps %xmm0,0(%rsp) 2020 pxor %xmm8,%xmm8 2021 movaps %xmm0,16(%rsp) 2022 pxor %xmm9,%xmm9 2023 movaps %xmm0,32(%rsp) 2024 pxor %xmm10,%xmm10 2025 movaps %xmm0,48(%rsp) 2026 pxor %xmm11,%xmm11 2027 movaps %xmm0,64(%rsp) 2028 pxor %xmm12,%xmm12 2029 movaps %xmm0,80(%rsp) 2030 pxor %xmm13,%xmm13 2031 movaps %xmm0,96(%rsp) 2032 pxor %xmm14,%xmm14 2033 pxor %xmm15,%xmm15 2034 movq -8(%r11),%rbp 2035 leaq (%r11),%rsp 2036.Lxts_enc_epilogue: 2037 .byte 0xf3,0xc3 2038.size aesni_xts_encrypt,.-aesni_xts_encrypt 2039.globl aesni_xts_decrypt 2040.hidden aesni_xts_decrypt 2041.type aesni_xts_decrypt,@function 2042.align 16 2043aesni_xts_decrypt: 2044 leaq (%rsp),%r11 2045 pushq %rbp 2046 subq $112,%rsp 2047 andq $-16,%rsp 2048 movups (%r9),%xmm2 2049 movl 240(%r8),%eax 2050 movl 240(%rcx),%r10d 2051 movups (%r8),%xmm0 2052 movups 16(%r8),%xmm1 2053 leaq 32(%r8),%r8 2054 xorps %xmm0,%xmm2 2055.Loop_enc1_11: 2056.byte 102,15,56,220,209 2057 decl %eax 2058 movups (%r8),%xmm1 2059 leaq 16(%r8),%r8 2060 jnz .Loop_enc1_11 2061.byte 102,15,56,221,209 2062 xorl %eax,%eax 2063 testq $15,%rdx 2064 setnz %al 2065 shlq $4,%rax 2066 subq %rax,%rdx 2067 2068 movups (%rcx),%xmm0 2069 movq %rcx,%rbp 2070 movl %r10d,%eax 2071 shll $4,%r10d 2072 movq %rdx,%r9 2073 andq $-16,%rdx 2074 2075 movups 16(%rcx,%r10,1),%xmm1 2076 2077 movdqa .Lxts_magic(%rip),%xmm8 2078 movdqa %xmm2,%xmm15 2079 pshufd $0x5f,%xmm2,%xmm9 2080 pxor %xmm0,%xmm1 2081 movdqa %xmm9,%xmm14 2082 paddd %xmm9,%xmm9 2083 movdqa %xmm15,%xmm10 2084 psrad $31,%xmm14 2085 paddq %xmm15,%xmm15 2086 pand %xmm8,%xmm14 2087 pxor %xmm0,%xmm10 2088 pxor %xmm14,%xmm15 2089 movdqa %xmm9,%xmm14 2090 paddd %xmm9,%xmm9 2091 movdqa %xmm15,%xmm11 2092 psrad $31,%xmm14 2093 paddq %xmm15,%xmm15 2094 pand %xmm8,%xmm14 2095 pxor %xmm0,%xmm11 2096 pxor %xmm14,%xmm15 2097 movdqa %xmm9,%xmm14 2098 paddd %xmm9,%xmm9 2099 movdqa %xmm15,%xmm12 2100 psrad $31,%xmm14 2101 paddq %xmm15,%xmm15 2102 pand %xmm8,%xmm14 2103 pxor %xmm0,%xmm12 2104 pxor %xmm14,%xmm15 2105 movdqa %xmm9,%xmm14 2106 paddd %xmm9,%xmm9 2107 movdqa %xmm15,%xmm13 2108 psrad $31,%xmm14 2109 paddq %xmm15,%xmm15 2110 pand %xmm8,%xmm14 2111 pxor %xmm0,%xmm13 2112 pxor %xmm14,%xmm15 2113 movdqa %xmm15,%xmm14 2114 psrad $31,%xmm9 2115 paddq %xmm15,%xmm15 2116 pand %xmm8,%xmm9 2117 pxor %xmm0,%xmm14 2118 pxor %xmm9,%xmm15 2119 movaps %xmm1,96(%rsp) 2120 2121 subq $96,%rdx 2122 jc .Lxts_dec_short 2123 2124 movl $16+96,%eax 2125 leaq 32(%rbp,%r10,1),%rcx 2126 subq %r10,%rax 2127 movups 16(%rbp),%xmm1 2128 movq %rax,%r10 2129 leaq .Lxts_magic(%rip),%r8 2130 jmp .Lxts_dec_grandloop 2131 2132.align 32 2133.Lxts_dec_grandloop: 2134 movdqu 0(%rdi),%xmm2 2135 movdqa %xmm0,%xmm8 2136 movdqu 16(%rdi),%xmm3 2137 pxor %xmm10,%xmm2 2138 movdqu 32(%rdi),%xmm4 2139 pxor %xmm11,%xmm3 2140.byte 102,15,56,222,209 2141 movdqu 48(%rdi),%xmm5 2142 pxor %xmm12,%xmm4 2143.byte 102,15,56,222,217 2144 movdqu 64(%rdi),%xmm6 2145 pxor %xmm13,%xmm5 2146.byte 102,15,56,222,225 2147 movdqu 80(%rdi),%xmm7 2148 pxor %xmm15,%xmm8 2149 movdqa 96(%rsp),%xmm9 2150 pxor %xmm14,%xmm6 2151.byte 102,15,56,222,233 2152 movups 32(%rbp),%xmm0 2153 leaq 96(%rdi),%rdi 2154 pxor %xmm8,%xmm7 2155 2156 pxor %xmm9,%xmm10 2157.byte 102,15,56,222,241 2158 pxor %xmm9,%xmm11 2159 movdqa %xmm10,0(%rsp) 2160.byte 102,15,56,222,249 2161 movups 48(%rbp),%xmm1 2162 pxor %xmm9,%xmm12 2163 2164.byte 102,15,56,222,208 2165 pxor %xmm9,%xmm13 2166 movdqa %xmm11,16(%rsp) 2167.byte 102,15,56,222,216 2168 pxor %xmm9,%xmm14 2169 movdqa %xmm12,32(%rsp) 2170.byte 102,15,56,222,224 2171.byte 102,15,56,222,232 2172 pxor %xmm9,%xmm8 2173 movdqa %xmm14,64(%rsp) 2174.byte 102,15,56,222,240 2175.byte 102,15,56,222,248 2176 movups 64(%rbp),%xmm0 2177 movdqa %xmm8,80(%rsp) 2178 pshufd $0x5f,%xmm15,%xmm9 2179 jmp .Lxts_dec_loop6 2180.align 32 2181.Lxts_dec_loop6: 2182.byte 102,15,56,222,209 2183.byte 102,15,56,222,217 2184.byte 102,15,56,222,225 2185.byte 102,15,56,222,233 2186.byte 102,15,56,222,241 2187.byte 102,15,56,222,249 2188 movups -64(%rcx,%rax,1),%xmm1 2189 addq $32,%rax 2190 2191.byte 102,15,56,222,208 2192.byte 102,15,56,222,216 2193.byte 102,15,56,222,224 2194.byte 102,15,56,222,232 2195.byte 102,15,56,222,240 2196.byte 102,15,56,222,248 2197 movups -80(%rcx,%rax,1),%xmm0 2198 jnz .Lxts_dec_loop6 2199 2200 movdqa (%r8),%xmm8 2201 movdqa %xmm9,%xmm14 2202 paddd %xmm9,%xmm9 2203.byte 102,15,56,222,209 2204 paddq %xmm15,%xmm15 2205 psrad $31,%xmm14 2206.byte 102,15,56,222,217 2207 pand %xmm8,%xmm14 2208 movups (%rbp),%xmm10 2209.byte 102,15,56,222,225 2210.byte 102,15,56,222,233 2211.byte 102,15,56,222,241 2212 pxor %xmm14,%xmm15 2213 movaps %xmm10,%xmm11 2214.byte 102,15,56,222,249 2215 movups -64(%rcx),%xmm1 2216 2217 movdqa %xmm9,%xmm14 2218.byte 102,15,56,222,208 2219 paddd %xmm9,%xmm9 2220 pxor %xmm15,%xmm10 2221.byte 102,15,56,222,216 2222 psrad $31,%xmm14 2223 paddq %xmm15,%xmm15 2224.byte 102,15,56,222,224 2225.byte 102,15,56,222,232 2226 pand %xmm8,%xmm14 2227 movaps %xmm11,%xmm12 2228.byte 102,15,56,222,240 2229 pxor %xmm14,%xmm15 2230 movdqa %xmm9,%xmm14 2231.byte 102,15,56,222,248 2232 movups -48(%rcx),%xmm0 2233 2234 paddd %xmm9,%xmm9 2235.byte 102,15,56,222,209 2236 pxor %xmm15,%xmm11 2237 psrad $31,%xmm14 2238.byte 102,15,56,222,217 2239 paddq %xmm15,%xmm15 2240 pand %xmm8,%xmm14 2241.byte 102,15,56,222,225 2242.byte 102,15,56,222,233 2243 movdqa %xmm13,48(%rsp) 2244 pxor %xmm14,%xmm15 2245.byte 102,15,56,222,241 2246 movaps %xmm12,%xmm13 2247 movdqa %xmm9,%xmm14 2248.byte 102,15,56,222,249 2249 movups -32(%rcx),%xmm1 2250 2251 paddd %xmm9,%xmm9 2252.byte 102,15,56,222,208 2253 pxor %xmm15,%xmm12 2254 psrad $31,%xmm14 2255.byte 102,15,56,222,216 2256 paddq %xmm15,%xmm15 2257 pand %xmm8,%xmm14 2258.byte 102,15,56,222,224 2259.byte 102,15,56,222,232 2260.byte 102,15,56,222,240 2261 pxor %xmm14,%xmm15 2262 movaps %xmm13,%xmm14 2263.byte 102,15,56,222,248 2264 2265 movdqa %xmm9,%xmm0 2266 paddd %xmm9,%xmm9 2267.byte 102,15,56,222,209 2268 pxor %xmm15,%xmm13 2269 psrad $31,%xmm0 2270.byte 102,15,56,222,217 2271 paddq %xmm15,%xmm15 2272 pand %xmm8,%xmm0 2273.byte 102,15,56,222,225 2274.byte 102,15,56,222,233 2275 pxor %xmm0,%xmm15 2276 movups (%rbp),%xmm0 2277.byte 102,15,56,222,241 2278.byte 102,15,56,222,249 2279 movups 16(%rbp),%xmm1 2280 2281 pxor %xmm15,%xmm14 2282.byte 102,15,56,223,84,36,0 2283 psrad $31,%xmm9 2284 paddq %xmm15,%xmm15 2285.byte 102,15,56,223,92,36,16 2286.byte 102,15,56,223,100,36,32 2287 pand %xmm8,%xmm9 2288 movq %r10,%rax 2289.byte 102,15,56,223,108,36,48 2290.byte 102,15,56,223,116,36,64 2291.byte 102,15,56,223,124,36,80 2292 pxor %xmm9,%xmm15 2293 2294 leaq 96(%rsi),%rsi 2295 movups %xmm2,-96(%rsi) 2296 movups %xmm3,-80(%rsi) 2297 movups %xmm4,-64(%rsi) 2298 movups %xmm5,-48(%rsi) 2299 movups %xmm6,-32(%rsi) 2300 movups %xmm7,-16(%rsi) 2301 subq $96,%rdx 2302 jnc .Lxts_dec_grandloop 2303 2304 movl $16+96,%eax 2305 subl %r10d,%eax 2306 movq %rbp,%rcx 2307 shrl $4,%eax 2308 2309.Lxts_dec_short: 2310 2311 movl %eax,%r10d 2312 pxor %xmm0,%xmm10 2313 pxor %xmm0,%xmm11 2314 addq $96,%rdx 2315 jz .Lxts_dec_done 2316 2317 pxor %xmm0,%xmm12 2318 cmpq $0x20,%rdx 2319 jb .Lxts_dec_one 2320 pxor %xmm0,%xmm13 2321 je .Lxts_dec_two 2322 2323 pxor %xmm0,%xmm14 2324 cmpq $0x40,%rdx 2325 jb .Lxts_dec_three 2326 je .Lxts_dec_four 2327 2328 movdqu (%rdi),%xmm2 2329 movdqu 16(%rdi),%xmm3 2330 movdqu 32(%rdi),%xmm4 2331 pxor %xmm10,%xmm2 2332 movdqu 48(%rdi),%xmm5 2333 pxor %xmm11,%xmm3 2334 movdqu 64(%rdi),%xmm6 2335 leaq 80(%rdi),%rdi 2336 pxor %xmm12,%xmm4 2337 pxor %xmm13,%xmm5 2338 pxor %xmm14,%xmm6 2339 2340 call _aesni_decrypt6 2341 2342 xorps %xmm10,%xmm2 2343 xorps %xmm11,%xmm3 2344 xorps %xmm12,%xmm4 2345 movdqu %xmm2,(%rsi) 2346 xorps %xmm13,%xmm5 2347 movdqu %xmm3,16(%rsi) 2348 xorps %xmm14,%xmm6 2349 movdqu %xmm4,32(%rsi) 2350 pxor %xmm14,%xmm14 2351 movdqu %xmm5,48(%rsi) 2352 pcmpgtd %xmm15,%xmm14 2353 movdqu %xmm6,64(%rsi) 2354 leaq 80(%rsi),%rsi 2355 pshufd $0x13,%xmm14,%xmm11 2356 andq $15,%r9 2357 jz .Lxts_dec_ret 2358 2359 movdqa %xmm15,%xmm10 2360 paddq %xmm15,%xmm15 2361 pand %xmm8,%xmm11 2362 pxor %xmm15,%xmm11 2363 jmp .Lxts_dec_done2 2364 2365.align 16 2366.Lxts_dec_one: 2367 movups (%rdi),%xmm2 2368 leaq 16(%rdi),%rdi 2369 xorps %xmm10,%xmm2 2370 movups (%rcx),%xmm0 2371 movups 16(%rcx),%xmm1 2372 leaq 32(%rcx),%rcx 2373 xorps %xmm0,%xmm2 2374.Loop_dec1_12: 2375.byte 102,15,56,222,209 2376 decl %eax 2377 movups (%rcx),%xmm1 2378 leaq 16(%rcx),%rcx 2379 jnz .Loop_dec1_12 2380.byte 102,15,56,223,209 2381 xorps %xmm10,%xmm2 2382 movdqa %xmm11,%xmm10 2383 movups %xmm2,(%rsi) 2384 movdqa %xmm12,%xmm11 2385 leaq 16(%rsi),%rsi 2386 jmp .Lxts_dec_done 2387 2388.align 16 2389.Lxts_dec_two: 2390 movups (%rdi),%xmm2 2391 movups 16(%rdi),%xmm3 2392 leaq 32(%rdi),%rdi 2393 xorps %xmm10,%xmm2 2394 xorps %xmm11,%xmm3 2395 2396 call _aesni_decrypt2 2397 2398 xorps %xmm10,%xmm2 2399 movdqa %xmm12,%xmm10 2400 xorps %xmm11,%xmm3 2401 movdqa %xmm13,%xmm11 2402 movups %xmm2,(%rsi) 2403 movups %xmm3,16(%rsi) 2404 leaq 32(%rsi),%rsi 2405 jmp .Lxts_dec_done 2406 2407.align 16 2408.Lxts_dec_three: 2409 movups (%rdi),%xmm2 2410 movups 16(%rdi),%xmm3 2411 movups 32(%rdi),%xmm4 2412 leaq 48(%rdi),%rdi 2413 xorps %xmm10,%xmm2 2414 xorps %xmm11,%xmm3 2415 xorps %xmm12,%xmm4 2416 2417 call _aesni_decrypt3 2418 2419 xorps %xmm10,%xmm2 2420 movdqa %xmm13,%xmm10 2421 xorps %xmm11,%xmm3 2422 movdqa %xmm14,%xmm11 2423 xorps %xmm12,%xmm4 2424 movups %xmm2,(%rsi) 2425 movups %xmm3,16(%rsi) 2426 movups %xmm4,32(%rsi) 2427 leaq 48(%rsi),%rsi 2428 jmp .Lxts_dec_done 2429 2430.align 16 2431.Lxts_dec_four: 2432 movups (%rdi),%xmm2 2433 movups 16(%rdi),%xmm3 2434 movups 32(%rdi),%xmm4 2435 xorps %xmm10,%xmm2 2436 movups 48(%rdi),%xmm5 2437 leaq 64(%rdi),%rdi 2438 xorps %xmm11,%xmm3 2439 xorps %xmm12,%xmm4 2440 xorps %xmm13,%xmm5 2441 2442 call _aesni_decrypt4 2443 2444 pxor %xmm10,%xmm2 2445 movdqa %xmm14,%xmm10 2446 pxor %xmm11,%xmm3 2447 movdqa %xmm15,%xmm11 2448 pxor %xmm12,%xmm4 2449 movdqu %xmm2,(%rsi) 2450 pxor %xmm13,%xmm5 2451 movdqu %xmm3,16(%rsi) 2452 movdqu %xmm4,32(%rsi) 2453 movdqu %xmm5,48(%rsi) 2454 leaq 64(%rsi),%rsi 2455 jmp .Lxts_dec_done 2456 2457.align 16 2458.Lxts_dec_done: 2459 andq $15,%r9 2460 jz .Lxts_dec_ret 2461.Lxts_dec_done2: 2462 movq %r9,%rdx 2463 movq %rbp,%rcx 2464 movl %r10d,%eax 2465 2466 movups (%rdi),%xmm2 2467 xorps %xmm11,%xmm2 2468 movups (%rcx),%xmm0 2469 movups 16(%rcx),%xmm1 2470 leaq 32(%rcx),%rcx 2471 xorps %xmm0,%xmm2 2472.Loop_dec1_13: 2473.byte 102,15,56,222,209 2474 decl %eax 2475 movups (%rcx),%xmm1 2476 leaq 16(%rcx),%rcx 2477 jnz .Loop_dec1_13 2478.byte 102,15,56,223,209 2479 xorps %xmm11,%xmm2 2480 movups %xmm2,(%rsi) 2481 2482.Lxts_dec_steal: 2483 movzbl 16(%rdi),%eax 2484 movzbl (%rsi),%ecx 2485 leaq 1(%rdi),%rdi 2486 movb %al,(%rsi) 2487 movb %cl,16(%rsi) 2488 leaq 1(%rsi),%rsi 2489 subq $1,%rdx 2490 jnz .Lxts_dec_steal 2491 2492 subq %r9,%rsi 2493 movq %rbp,%rcx 2494 movl %r10d,%eax 2495 2496 movups (%rsi),%xmm2 2497 xorps %xmm10,%xmm2 2498 movups (%rcx),%xmm0 2499 movups 16(%rcx),%xmm1 2500 leaq 32(%rcx),%rcx 2501 xorps %xmm0,%xmm2 2502.Loop_dec1_14: 2503.byte 102,15,56,222,209 2504 decl %eax 2505 movups (%rcx),%xmm1 2506 leaq 16(%rcx),%rcx 2507 jnz .Loop_dec1_14 2508.byte 102,15,56,223,209 2509 xorps %xmm10,%xmm2 2510 movups %xmm2,(%rsi) 2511 2512.Lxts_dec_ret: 2513 xorps %xmm0,%xmm0 2514 pxor %xmm1,%xmm1 2515 pxor %xmm2,%xmm2 2516 pxor %xmm3,%xmm3 2517 pxor %xmm4,%xmm4 2518 pxor %xmm5,%xmm5 2519 pxor %xmm6,%xmm6 2520 pxor %xmm7,%xmm7 2521 movaps %xmm0,0(%rsp) 2522 pxor %xmm8,%xmm8 2523 movaps %xmm0,16(%rsp) 2524 pxor %xmm9,%xmm9 2525 movaps %xmm0,32(%rsp) 2526 pxor %xmm10,%xmm10 2527 movaps %xmm0,48(%rsp) 2528 pxor %xmm11,%xmm11 2529 movaps %xmm0,64(%rsp) 2530 pxor %xmm12,%xmm12 2531 movaps %xmm0,80(%rsp) 2532 pxor %xmm13,%xmm13 2533 movaps %xmm0,96(%rsp) 2534 pxor %xmm14,%xmm14 2535 pxor %xmm15,%xmm15 2536 movq -8(%r11),%rbp 2537 leaq (%r11),%rsp 2538.Lxts_dec_epilogue: 2539 .byte 0xf3,0xc3 2540.size aesni_xts_decrypt,.-aesni_xts_decrypt 2541.globl aesni_ocb_encrypt 2542.hidden aesni_ocb_encrypt 2543.type aesni_ocb_encrypt,@function 2544.align 32 2545aesni_ocb_encrypt: 2546 leaq (%rsp),%rax 2547 pushq %rbx 2548 pushq %rbp 2549 pushq %r12 2550 pushq %r13 2551 pushq %r14 2552 movq 8(%rax),%rbx 2553 movq 8+8(%rax),%rbp 2554 2555 movl 240(%rcx),%r10d 2556 movq %rcx,%r11 2557 shll $4,%r10d 2558 movups (%rcx),%xmm9 2559 movups 16(%rcx,%r10,1),%xmm1 2560 2561 movdqu (%r9),%xmm15 2562 pxor %xmm1,%xmm9 2563 pxor %xmm1,%xmm15 2564 2565 movl $16+32,%eax 2566 leaq 32(%r11,%r10,1),%rcx 2567 movups 16(%r11),%xmm1 2568 subq %r10,%rax 2569 movq %rax,%r10 2570 2571 movdqu (%rbx),%xmm10 2572 movdqu (%rbp),%xmm8 2573 2574 testq $1,%r8 2575 jnz .Locb_enc_odd 2576 2577 bsfq %r8,%r12 2578 addq $1,%r8 2579 shlq $4,%r12 2580 movdqu (%rbx,%r12,1),%xmm7 2581 movdqu (%rdi),%xmm2 2582 leaq 16(%rdi),%rdi 2583 2584 call __ocb_encrypt1 2585 2586 movdqa %xmm7,%xmm15 2587 movups %xmm2,(%rsi) 2588 leaq 16(%rsi),%rsi 2589 subq $1,%rdx 2590 jz .Locb_enc_done 2591 2592.Locb_enc_odd: 2593 leaq 1(%r8),%r12 2594 leaq 3(%r8),%r13 2595 leaq 5(%r8),%r14 2596 leaq 6(%r8),%r8 2597 bsfq %r12,%r12 2598 bsfq %r13,%r13 2599 bsfq %r14,%r14 2600 shlq $4,%r12 2601 shlq $4,%r13 2602 shlq $4,%r14 2603 2604 subq $6,%rdx 2605 jc .Locb_enc_short 2606 jmp .Locb_enc_grandloop 2607 2608.align 32 2609.Locb_enc_grandloop: 2610 movdqu 0(%rdi),%xmm2 2611 movdqu 16(%rdi),%xmm3 2612 movdqu 32(%rdi),%xmm4 2613 movdqu 48(%rdi),%xmm5 2614 movdqu 64(%rdi),%xmm6 2615 movdqu 80(%rdi),%xmm7 2616 leaq 96(%rdi),%rdi 2617 2618 call __ocb_encrypt6 2619 2620 movups %xmm2,0(%rsi) 2621 movups %xmm3,16(%rsi) 2622 movups %xmm4,32(%rsi) 2623 movups %xmm5,48(%rsi) 2624 movups %xmm6,64(%rsi) 2625 movups %xmm7,80(%rsi) 2626 leaq 96(%rsi),%rsi 2627 subq $6,%rdx 2628 jnc .Locb_enc_grandloop 2629 2630.Locb_enc_short: 2631 addq $6,%rdx 2632 jz .Locb_enc_done 2633 2634 movdqu 0(%rdi),%xmm2 2635 cmpq $2,%rdx 2636 jb .Locb_enc_one 2637 movdqu 16(%rdi),%xmm3 2638 je .Locb_enc_two 2639 2640 movdqu 32(%rdi),%xmm4 2641 cmpq $4,%rdx 2642 jb .Locb_enc_three 2643 movdqu 48(%rdi),%xmm5 2644 je .Locb_enc_four 2645 2646 movdqu 64(%rdi),%xmm6 2647 pxor %xmm7,%xmm7 2648 2649 call __ocb_encrypt6 2650 2651 movdqa %xmm14,%xmm15 2652 movups %xmm2,0(%rsi) 2653 movups %xmm3,16(%rsi) 2654 movups %xmm4,32(%rsi) 2655 movups %xmm5,48(%rsi) 2656 movups %xmm6,64(%rsi) 2657 2658 jmp .Locb_enc_done 2659 2660.align 16 2661.Locb_enc_one: 2662 movdqa %xmm10,%xmm7 2663 2664 call __ocb_encrypt1 2665 2666 movdqa %xmm7,%xmm15 2667 movups %xmm2,0(%rsi) 2668 jmp .Locb_enc_done 2669 2670.align 16 2671.Locb_enc_two: 2672 pxor %xmm4,%xmm4 2673 pxor %xmm5,%xmm5 2674 2675 call __ocb_encrypt4 2676 2677 movdqa %xmm11,%xmm15 2678 movups %xmm2,0(%rsi) 2679 movups %xmm3,16(%rsi) 2680 2681 jmp .Locb_enc_done 2682 2683.align 16 2684.Locb_enc_three: 2685 pxor %xmm5,%xmm5 2686 2687 call __ocb_encrypt4 2688 2689 movdqa %xmm12,%xmm15 2690 movups %xmm2,0(%rsi) 2691 movups %xmm3,16(%rsi) 2692 movups %xmm4,32(%rsi) 2693 2694 jmp .Locb_enc_done 2695 2696.align 16 2697.Locb_enc_four: 2698 call __ocb_encrypt4 2699 2700 movdqa %xmm13,%xmm15 2701 movups %xmm2,0(%rsi) 2702 movups %xmm3,16(%rsi) 2703 movups %xmm4,32(%rsi) 2704 movups %xmm5,48(%rsi) 2705 2706.Locb_enc_done: 2707 pxor %xmm0,%xmm15 2708 movdqu %xmm8,(%rbp) 2709 movdqu %xmm15,(%r9) 2710 2711 xorps %xmm0,%xmm0 2712 pxor %xmm1,%xmm1 2713 pxor %xmm2,%xmm2 2714 pxor %xmm3,%xmm3 2715 pxor %xmm4,%xmm4 2716 pxor %xmm5,%xmm5 2717 pxor %xmm6,%xmm6 2718 pxor %xmm7,%xmm7 2719 pxor %xmm8,%xmm8 2720 pxor %xmm9,%xmm9 2721 pxor %xmm10,%xmm10 2722 pxor %xmm11,%xmm11 2723 pxor %xmm12,%xmm12 2724 pxor %xmm13,%xmm13 2725 pxor %xmm14,%xmm14 2726 pxor %xmm15,%xmm15 2727 leaq 40(%rsp),%rax 2728 movq -40(%rax),%r14 2729 movq -32(%rax),%r13 2730 movq -24(%rax),%r12 2731 movq -16(%rax),%rbp 2732 movq -8(%rax),%rbx 2733 leaq (%rax),%rsp 2734.Locb_enc_epilogue: 2735 .byte 0xf3,0xc3 2736.size aesni_ocb_encrypt,.-aesni_ocb_encrypt 2737 2738.type __ocb_encrypt6,@function 2739.align 32 2740__ocb_encrypt6: 2741 pxor %xmm9,%xmm15 2742 movdqu (%rbx,%r12,1),%xmm11 2743 movdqa %xmm10,%xmm12 2744 movdqu (%rbx,%r13,1),%xmm13 2745 movdqa %xmm10,%xmm14 2746 pxor %xmm15,%xmm10 2747 movdqu (%rbx,%r14,1),%xmm15 2748 pxor %xmm10,%xmm11 2749 pxor %xmm2,%xmm8 2750 pxor %xmm10,%xmm2 2751 pxor %xmm11,%xmm12 2752 pxor %xmm3,%xmm8 2753 pxor %xmm11,%xmm3 2754 pxor %xmm12,%xmm13 2755 pxor %xmm4,%xmm8 2756 pxor %xmm12,%xmm4 2757 pxor %xmm13,%xmm14 2758 pxor %xmm5,%xmm8 2759 pxor %xmm13,%xmm5 2760 pxor %xmm14,%xmm15 2761 pxor %xmm6,%xmm8 2762 pxor %xmm14,%xmm6 2763 pxor %xmm7,%xmm8 2764 pxor %xmm15,%xmm7 2765 movups 32(%r11),%xmm0 2766 2767 leaq 1(%r8),%r12 2768 leaq 3(%r8),%r13 2769 leaq 5(%r8),%r14 2770 addq $6,%r8 2771 pxor %xmm9,%xmm10 2772 bsfq %r12,%r12 2773 bsfq %r13,%r13 2774 bsfq %r14,%r14 2775 2776.byte 102,15,56,220,209 2777.byte 102,15,56,220,217 2778.byte 102,15,56,220,225 2779.byte 102,15,56,220,233 2780 pxor %xmm9,%xmm11 2781 pxor %xmm9,%xmm12 2782.byte 102,15,56,220,241 2783 pxor %xmm9,%xmm13 2784 pxor %xmm9,%xmm14 2785.byte 102,15,56,220,249 2786 movups 48(%r11),%xmm1 2787 pxor %xmm9,%xmm15 2788 2789.byte 102,15,56,220,208 2790.byte 102,15,56,220,216 2791.byte 102,15,56,220,224 2792.byte 102,15,56,220,232 2793.byte 102,15,56,220,240 2794.byte 102,15,56,220,248 2795 movups 64(%r11),%xmm0 2796 shlq $4,%r12 2797 shlq $4,%r13 2798 jmp .Locb_enc_loop6 2799 2800.align 32 2801.Locb_enc_loop6: 2802.byte 102,15,56,220,209 2803.byte 102,15,56,220,217 2804.byte 102,15,56,220,225 2805.byte 102,15,56,220,233 2806.byte 102,15,56,220,241 2807.byte 102,15,56,220,249 2808 movups (%rcx,%rax,1),%xmm1 2809 addq $32,%rax 2810 2811.byte 102,15,56,220,208 2812.byte 102,15,56,220,216 2813.byte 102,15,56,220,224 2814.byte 102,15,56,220,232 2815.byte 102,15,56,220,240 2816.byte 102,15,56,220,248 2817 movups -16(%rcx,%rax,1),%xmm0 2818 jnz .Locb_enc_loop6 2819 2820.byte 102,15,56,220,209 2821.byte 102,15,56,220,217 2822.byte 102,15,56,220,225 2823.byte 102,15,56,220,233 2824.byte 102,15,56,220,241 2825.byte 102,15,56,220,249 2826 movups 16(%r11),%xmm1 2827 shlq $4,%r14 2828 2829.byte 102,65,15,56,221,210 2830 movdqu (%rbx),%xmm10 2831 movq %r10,%rax 2832.byte 102,65,15,56,221,219 2833.byte 102,65,15,56,221,228 2834.byte 102,65,15,56,221,237 2835.byte 102,65,15,56,221,246 2836.byte 102,65,15,56,221,255 2837 .byte 0xf3,0xc3 2838.size __ocb_encrypt6,.-__ocb_encrypt6 2839 2840.type __ocb_encrypt4,@function 2841.align 32 2842__ocb_encrypt4: 2843 pxor %xmm9,%xmm15 2844 movdqu (%rbx,%r12,1),%xmm11 2845 movdqa %xmm10,%xmm12 2846 movdqu (%rbx,%r13,1),%xmm13 2847 pxor %xmm15,%xmm10 2848 pxor %xmm10,%xmm11 2849 pxor %xmm2,%xmm8 2850 pxor %xmm10,%xmm2 2851 pxor %xmm11,%xmm12 2852 pxor %xmm3,%xmm8 2853 pxor %xmm11,%xmm3 2854 pxor %xmm12,%xmm13 2855 pxor %xmm4,%xmm8 2856 pxor %xmm12,%xmm4 2857 pxor %xmm5,%xmm8 2858 pxor %xmm13,%xmm5 2859 movups 32(%r11),%xmm0 2860 2861 pxor %xmm9,%xmm10 2862 pxor %xmm9,%xmm11 2863 pxor %xmm9,%xmm12 2864 pxor %xmm9,%xmm13 2865 2866.byte 102,15,56,220,209 2867.byte 102,15,56,220,217 2868.byte 102,15,56,220,225 2869.byte 102,15,56,220,233 2870 movups 48(%r11),%xmm1 2871 2872.byte 102,15,56,220,208 2873.byte 102,15,56,220,216 2874.byte 102,15,56,220,224 2875.byte 102,15,56,220,232 2876 movups 64(%r11),%xmm0 2877 jmp .Locb_enc_loop4 2878 2879.align 32 2880.Locb_enc_loop4: 2881.byte 102,15,56,220,209 2882.byte 102,15,56,220,217 2883.byte 102,15,56,220,225 2884.byte 102,15,56,220,233 2885 movups (%rcx,%rax,1),%xmm1 2886 addq $32,%rax 2887 2888.byte 102,15,56,220,208 2889.byte 102,15,56,220,216 2890.byte 102,15,56,220,224 2891.byte 102,15,56,220,232 2892 movups -16(%rcx,%rax,1),%xmm0 2893 jnz .Locb_enc_loop4 2894 2895.byte 102,15,56,220,209 2896.byte 102,15,56,220,217 2897.byte 102,15,56,220,225 2898.byte 102,15,56,220,233 2899 movups 16(%r11),%xmm1 2900 movq %r10,%rax 2901 2902.byte 102,65,15,56,221,210 2903.byte 102,65,15,56,221,219 2904.byte 102,65,15,56,221,228 2905.byte 102,65,15,56,221,237 2906 .byte 0xf3,0xc3 2907.size __ocb_encrypt4,.-__ocb_encrypt4 2908 2909.type __ocb_encrypt1,@function 2910.align 32 2911__ocb_encrypt1: 2912 pxor %xmm15,%xmm7 2913 pxor %xmm9,%xmm7 2914 pxor %xmm2,%xmm8 2915 pxor %xmm7,%xmm2 2916 movups 32(%r11),%xmm0 2917 2918.byte 102,15,56,220,209 2919 movups 48(%r11),%xmm1 2920 pxor %xmm9,%xmm7 2921 2922.byte 102,15,56,220,208 2923 movups 64(%r11),%xmm0 2924 jmp .Locb_enc_loop1 2925 2926.align 32 2927.Locb_enc_loop1: 2928.byte 102,15,56,220,209 2929 movups (%rcx,%rax,1),%xmm1 2930 addq $32,%rax 2931 2932.byte 102,15,56,220,208 2933 movups -16(%rcx,%rax,1),%xmm0 2934 jnz .Locb_enc_loop1 2935 2936.byte 102,15,56,220,209 2937 movups 16(%r11),%xmm1 2938 movq %r10,%rax 2939 2940.byte 102,15,56,221,215 2941 .byte 0xf3,0xc3 2942.size __ocb_encrypt1,.-__ocb_encrypt1 2943 2944.globl aesni_ocb_decrypt 2945.hidden aesni_ocb_decrypt 2946.type aesni_ocb_decrypt,@function 2947.align 32 2948aesni_ocb_decrypt: 2949 leaq (%rsp),%rax 2950 pushq %rbx 2951 pushq %rbp 2952 pushq %r12 2953 pushq %r13 2954 pushq %r14 2955 movq 8(%rax),%rbx 2956 movq 8+8(%rax),%rbp 2957 2958 movl 240(%rcx),%r10d 2959 movq %rcx,%r11 2960 shll $4,%r10d 2961 movups (%rcx),%xmm9 2962 movups 16(%rcx,%r10,1),%xmm1 2963 2964 movdqu (%r9),%xmm15 2965 pxor %xmm1,%xmm9 2966 pxor %xmm1,%xmm15 2967 2968 movl $16+32,%eax 2969 leaq 32(%r11,%r10,1),%rcx 2970 movups 16(%r11),%xmm1 2971 subq %r10,%rax 2972 movq %rax,%r10 2973 2974 movdqu (%rbx),%xmm10 2975 movdqu (%rbp),%xmm8 2976 2977 testq $1,%r8 2978 jnz .Locb_dec_odd 2979 2980 bsfq %r8,%r12 2981 addq $1,%r8 2982 shlq $4,%r12 2983 movdqu (%rbx,%r12,1),%xmm7 2984 movdqu (%rdi),%xmm2 2985 leaq 16(%rdi),%rdi 2986 2987 call __ocb_decrypt1 2988 2989 movdqa %xmm7,%xmm15 2990 movups %xmm2,(%rsi) 2991 xorps %xmm2,%xmm8 2992 leaq 16(%rsi),%rsi 2993 subq $1,%rdx 2994 jz .Locb_dec_done 2995 2996.Locb_dec_odd: 2997 leaq 1(%r8),%r12 2998 leaq 3(%r8),%r13 2999 leaq 5(%r8),%r14 3000 leaq 6(%r8),%r8 3001 bsfq %r12,%r12 3002 bsfq %r13,%r13 3003 bsfq %r14,%r14 3004 shlq $4,%r12 3005 shlq $4,%r13 3006 shlq $4,%r14 3007 3008 subq $6,%rdx 3009 jc .Locb_dec_short 3010 jmp .Locb_dec_grandloop 3011 3012.align 32 3013.Locb_dec_grandloop: 3014 movdqu 0(%rdi),%xmm2 3015 movdqu 16(%rdi),%xmm3 3016 movdqu 32(%rdi),%xmm4 3017 movdqu 48(%rdi),%xmm5 3018 movdqu 64(%rdi),%xmm6 3019 movdqu 80(%rdi),%xmm7 3020 leaq 96(%rdi),%rdi 3021 3022 call __ocb_decrypt6 3023 3024 movups %xmm2,0(%rsi) 3025 pxor %xmm2,%xmm8 3026 movups %xmm3,16(%rsi) 3027 pxor %xmm3,%xmm8 3028 movups %xmm4,32(%rsi) 3029 pxor %xmm4,%xmm8 3030 movups %xmm5,48(%rsi) 3031 pxor %xmm5,%xmm8 3032 movups %xmm6,64(%rsi) 3033 pxor %xmm6,%xmm8 3034 movups %xmm7,80(%rsi) 3035 pxor %xmm7,%xmm8 3036 leaq 96(%rsi),%rsi 3037 subq $6,%rdx 3038 jnc .Locb_dec_grandloop 3039 3040.Locb_dec_short: 3041 addq $6,%rdx 3042 jz .Locb_dec_done 3043 3044 movdqu 0(%rdi),%xmm2 3045 cmpq $2,%rdx 3046 jb .Locb_dec_one 3047 movdqu 16(%rdi),%xmm3 3048 je .Locb_dec_two 3049 3050 movdqu 32(%rdi),%xmm4 3051 cmpq $4,%rdx 3052 jb .Locb_dec_three 3053 movdqu 48(%rdi),%xmm5 3054 je .Locb_dec_four 3055 3056 movdqu 64(%rdi),%xmm6 3057 pxor %xmm7,%xmm7 3058 3059 call __ocb_decrypt6 3060 3061 movdqa %xmm14,%xmm15 3062 movups %xmm2,0(%rsi) 3063 pxor %xmm2,%xmm8 3064 movups %xmm3,16(%rsi) 3065 pxor %xmm3,%xmm8 3066 movups %xmm4,32(%rsi) 3067 pxor %xmm4,%xmm8 3068 movups %xmm5,48(%rsi) 3069 pxor %xmm5,%xmm8 3070 movups %xmm6,64(%rsi) 3071 pxor %xmm6,%xmm8 3072 3073 jmp .Locb_dec_done 3074 3075.align 16 3076.Locb_dec_one: 3077 movdqa %xmm10,%xmm7 3078 3079 call __ocb_decrypt1 3080 3081 movdqa %xmm7,%xmm15 3082 movups %xmm2,0(%rsi) 3083 xorps %xmm2,%xmm8 3084 jmp .Locb_dec_done 3085 3086.align 16 3087.Locb_dec_two: 3088 pxor %xmm4,%xmm4 3089 pxor %xmm5,%xmm5 3090 3091 call __ocb_decrypt4 3092 3093 movdqa %xmm11,%xmm15 3094 movups %xmm2,0(%rsi) 3095 xorps %xmm2,%xmm8 3096 movups %xmm3,16(%rsi) 3097 xorps %xmm3,%xmm8 3098 3099 jmp .Locb_dec_done 3100 3101.align 16 3102.Locb_dec_three: 3103 pxor %xmm5,%xmm5 3104 3105 call __ocb_decrypt4 3106 3107 movdqa %xmm12,%xmm15 3108 movups %xmm2,0(%rsi) 3109 xorps %xmm2,%xmm8 3110 movups %xmm3,16(%rsi) 3111 xorps %xmm3,%xmm8 3112 movups %xmm4,32(%rsi) 3113 xorps %xmm4,%xmm8 3114 3115 jmp .Locb_dec_done 3116 3117.align 16 3118.Locb_dec_four: 3119 call __ocb_decrypt4 3120 3121 movdqa %xmm13,%xmm15 3122 movups %xmm2,0(%rsi) 3123 pxor %xmm2,%xmm8 3124 movups %xmm3,16(%rsi) 3125 pxor %xmm3,%xmm8 3126 movups %xmm4,32(%rsi) 3127 pxor %xmm4,%xmm8 3128 movups %xmm5,48(%rsi) 3129 pxor %xmm5,%xmm8 3130 3131.Locb_dec_done: 3132 pxor %xmm0,%xmm15 3133 movdqu %xmm8,(%rbp) 3134 movdqu %xmm15,(%r9) 3135 3136 xorps %xmm0,%xmm0 3137 pxor %xmm1,%xmm1 3138 pxor %xmm2,%xmm2 3139 pxor %xmm3,%xmm3 3140 pxor %xmm4,%xmm4 3141 pxor %xmm5,%xmm5 3142 pxor %xmm6,%xmm6 3143 pxor %xmm7,%xmm7 3144 pxor %xmm8,%xmm8 3145 pxor %xmm9,%xmm9 3146 pxor %xmm10,%xmm10 3147 pxor %xmm11,%xmm11 3148 pxor %xmm12,%xmm12 3149 pxor %xmm13,%xmm13 3150 pxor %xmm14,%xmm14 3151 pxor %xmm15,%xmm15 3152 leaq 40(%rsp),%rax 3153 movq -40(%rax),%r14 3154 movq -32(%rax),%r13 3155 movq -24(%rax),%r12 3156 movq -16(%rax),%rbp 3157 movq -8(%rax),%rbx 3158 leaq (%rax),%rsp 3159.Locb_dec_epilogue: 3160 .byte 0xf3,0xc3 3161.size aesni_ocb_decrypt,.-aesni_ocb_decrypt 3162 3163.type __ocb_decrypt6,@function 3164.align 32 3165__ocb_decrypt6: 3166 pxor %xmm9,%xmm15 3167 movdqu (%rbx,%r12,1),%xmm11 3168 movdqa %xmm10,%xmm12 3169 movdqu (%rbx,%r13,1),%xmm13 3170 movdqa %xmm10,%xmm14 3171 pxor %xmm15,%xmm10 3172 movdqu (%rbx,%r14,1),%xmm15 3173 pxor %xmm10,%xmm11 3174 pxor %xmm10,%xmm2 3175 pxor %xmm11,%xmm12 3176 pxor %xmm11,%xmm3 3177 pxor %xmm12,%xmm13 3178 pxor %xmm12,%xmm4 3179 pxor %xmm13,%xmm14 3180 pxor %xmm13,%xmm5 3181 pxor %xmm14,%xmm15 3182 pxor %xmm14,%xmm6 3183 pxor %xmm15,%xmm7 3184 movups 32(%r11),%xmm0 3185 3186 leaq 1(%r8),%r12 3187 leaq 3(%r8),%r13 3188 leaq 5(%r8),%r14 3189 addq $6,%r8 3190 pxor %xmm9,%xmm10 3191 bsfq %r12,%r12 3192 bsfq %r13,%r13 3193 bsfq %r14,%r14 3194 3195.byte 102,15,56,222,209 3196.byte 102,15,56,222,217 3197.byte 102,15,56,222,225 3198.byte 102,15,56,222,233 3199 pxor %xmm9,%xmm11 3200 pxor %xmm9,%xmm12 3201.byte 102,15,56,222,241 3202 pxor %xmm9,%xmm13 3203 pxor %xmm9,%xmm14 3204.byte 102,15,56,222,249 3205 movups 48(%r11),%xmm1 3206 pxor %xmm9,%xmm15 3207 3208.byte 102,15,56,222,208 3209.byte 102,15,56,222,216 3210.byte 102,15,56,222,224 3211.byte 102,15,56,222,232 3212.byte 102,15,56,222,240 3213.byte 102,15,56,222,248 3214 movups 64(%r11),%xmm0 3215 shlq $4,%r12 3216 shlq $4,%r13 3217 jmp .Locb_dec_loop6 3218 3219.align 32 3220.Locb_dec_loop6: 3221.byte 102,15,56,222,209 3222.byte 102,15,56,222,217 3223.byte 102,15,56,222,225 3224.byte 102,15,56,222,233 3225.byte 102,15,56,222,241 3226.byte 102,15,56,222,249 3227 movups (%rcx,%rax,1),%xmm1 3228 addq $32,%rax 3229 3230.byte 102,15,56,222,208 3231.byte 102,15,56,222,216 3232.byte 102,15,56,222,224 3233.byte 102,15,56,222,232 3234.byte 102,15,56,222,240 3235.byte 102,15,56,222,248 3236 movups -16(%rcx,%rax,1),%xmm0 3237 jnz .Locb_dec_loop6 3238 3239.byte 102,15,56,222,209 3240.byte 102,15,56,222,217 3241.byte 102,15,56,222,225 3242.byte 102,15,56,222,233 3243.byte 102,15,56,222,241 3244.byte 102,15,56,222,249 3245 movups 16(%r11),%xmm1 3246 shlq $4,%r14 3247 3248.byte 102,65,15,56,223,210 3249 movdqu (%rbx),%xmm10 3250 movq %r10,%rax 3251.byte 102,65,15,56,223,219 3252.byte 102,65,15,56,223,228 3253.byte 102,65,15,56,223,237 3254.byte 102,65,15,56,223,246 3255.byte 102,65,15,56,223,255 3256 .byte 0xf3,0xc3 3257.size __ocb_decrypt6,.-__ocb_decrypt6 3258 3259.type __ocb_decrypt4,@function 3260.align 32 3261__ocb_decrypt4: 3262 pxor %xmm9,%xmm15 3263 movdqu (%rbx,%r12,1),%xmm11 3264 movdqa %xmm10,%xmm12 3265 movdqu (%rbx,%r13,1),%xmm13 3266 pxor %xmm15,%xmm10 3267 pxor %xmm10,%xmm11 3268 pxor %xmm10,%xmm2 3269 pxor %xmm11,%xmm12 3270 pxor %xmm11,%xmm3 3271 pxor %xmm12,%xmm13 3272 pxor %xmm12,%xmm4 3273 pxor %xmm13,%xmm5 3274 movups 32(%r11),%xmm0 3275 3276 pxor %xmm9,%xmm10 3277 pxor %xmm9,%xmm11 3278 pxor %xmm9,%xmm12 3279 pxor %xmm9,%xmm13 3280 3281.byte 102,15,56,222,209 3282.byte 102,15,56,222,217 3283.byte 102,15,56,222,225 3284.byte 102,15,56,222,233 3285 movups 48(%r11),%xmm1 3286 3287.byte 102,15,56,222,208 3288.byte 102,15,56,222,216 3289.byte 102,15,56,222,224 3290.byte 102,15,56,222,232 3291 movups 64(%r11),%xmm0 3292 jmp .Locb_dec_loop4 3293 3294.align 32 3295.Locb_dec_loop4: 3296.byte 102,15,56,222,209 3297.byte 102,15,56,222,217 3298.byte 102,15,56,222,225 3299.byte 102,15,56,222,233 3300 movups (%rcx,%rax,1),%xmm1 3301 addq $32,%rax 3302 3303.byte 102,15,56,222,208 3304.byte 102,15,56,222,216 3305.byte 102,15,56,222,224 3306.byte 102,15,56,222,232 3307 movups -16(%rcx,%rax,1),%xmm0 3308 jnz .Locb_dec_loop4 3309 3310.byte 102,15,56,222,209 3311.byte 102,15,56,222,217 3312.byte 102,15,56,222,225 3313.byte 102,15,56,222,233 3314 movups 16(%r11),%xmm1 3315 movq %r10,%rax 3316 3317.byte 102,65,15,56,223,210 3318.byte 102,65,15,56,223,219 3319.byte 102,65,15,56,223,228 3320.byte 102,65,15,56,223,237 3321 .byte 0xf3,0xc3 3322.size __ocb_decrypt4,.-__ocb_decrypt4 3323 3324.type __ocb_decrypt1,@function 3325.align 32 3326__ocb_decrypt1: 3327 pxor %xmm15,%xmm7 3328 pxor %xmm9,%xmm7 3329 pxor %xmm7,%xmm2 3330 movups 32(%r11),%xmm0 3331 3332.byte 102,15,56,222,209 3333 movups 48(%r11),%xmm1 3334 pxor %xmm9,%xmm7 3335 3336.byte 102,15,56,222,208 3337 movups 64(%r11),%xmm0 3338 jmp .Locb_dec_loop1 3339 3340.align 32 3341.Locb_dec_loop1: 3342.byte 102,15,56,222,209 3343 movups (%rcx,%rax,1),%xmm1 3344 addq $32,%rax 3345 3346.byte 102,15,56,222,208 3347 movups -16(%rcx,%rax,1),%xmm0 3348 jnz .Locb_dec_loop1 3349 3350.byte 102,15,56,222,209 3351 movups 16(%r11),%xmm1 3352 movq %r10,%rax 3353 3354.byte 102,15,56,223,215 3355 .byte 0xf3,0xc3 3356.size __ocb_decrypt1,.-__ocb_decrypt1 3357.globl aesni_cbc_encrypt 3358.hidden aesni_cbc_encrypt 3359.type aesni_cbc_encrypt,@function 3360.align 16 3361aesni_cbc_encrypt: 3362 testq %rdx,%rdx 3363 jz .Lcbc_ret 3364 3365 movl 240(%rcx),%r10d 3366 movq %rcx,%r11 3367 testl %r9d,%r9d 3368 jz .Lcbc_decrypt 3369 3370 movups (%r8),%xmm2 3371 movl %r10d,%eax 3372 cmpq $16,%rdx 3373 jb .Lcbc_enc_tail 3374 subq $16,%rdx 3375 jmp .Lcbc_enc_loop 3376.align 16 3377.Lcbc_enc_loop: 3378 movups (%rdi),%xmm3 3379 leaq 16(%rdi),%rdi 3380 3381 movups (%rcx),%xmm0 3382 movups 16(%rcx),%xmm1 3383 xorps %xmm0,%xmm3 3384 leaq 32(%rcx),%rcx 3385 xorps %xmm3,%xmm2 3386.Loop_enc1_15: 3387.byte 102,15,56,220,209 3388 decl %eax 3389 movups (%rcx),%xmm1 3390 leaq 16(%rcx),%rcx 3391 jnz .Loop_enc1_15 3392.byte 102,15,56,221,209 3393 movl %r10d,%eax 3394 movq %r11,%rcx 3395 movups %xmm2,0(%rsi) 3396 leaq 16(%rsi),%rsi 3397 subq $16,%rdx 3398 jnc .Lcbc_enc_loop 3399 addq $16,%rdx 3400 jnz .Lcbc_enc_tail 3401 pxor %xmm0,%xmm0 3402 pxor %xmm1,%xmm1 3403 movups %xmm2,(%r8) 3404 pxor %xmm2,%xmm2 3405 pxor %xmm3,%xmm3 3406 jmp .Lcbc_ret 3407 3408.Lcbc_enc_tail: 3409 movq %rdx,%rcx 3410 xchgq %rdi,%rsi 3411.long 0x9066A4F3 3412 movl $16,%ecx 3413 subq %rdx,%rcx 3414 xorl %eax,%eax 3415.long 0x9066AAF3 3416 leaq -16(%rdi),%rdi 3417 movl %r10d,%eax 3418 movq %rdi,%rsi 3419 movq %r11,%rcx 3420 xorq %rdx,%rdx 3421 jmp .Lcbc_enc_loop 3422 3423.align 16 3424.Lcbc_decrypt: 3425 cmpq $16,%rdx 3426 jne .Lcbc_decrypt_bulk 3427 3428 3429 3430 movdqu (%rdi),%xmm2 3431 movdqu (%r8),%xmm3 3432 movdqa %xmm2,%xmm4 3433 movups (%rcx),%xmm0 3434 movups 16(%rcx),%xmm1 3435 leaq 32(%rcx),%rcx 3436 xorps %xmm0,%xmm2 3437.Loop_dec1_16: 3438.byte 102,15,56,222,209 3439 decl %r10d 3440 movups (%rcx),%xmm1 3441 leaq 16(%rcx),%rcx 3442 jnz .Loop_dec1_16 3443.byte 102,15,56,223,209 3444 pxor %xmm0,%xmm0 3445 pxor %xmm1,%xmm1 3446 movdqu %xmm4,(%r8) 3447 xorps %xmm3,%xmm2 3448 pxor %xmm3,%xmm3 3449 movups %xmm2,(%rsi) 3450 pxor %xmm2,%xmm2 3451 jmp .Lcbc_ret 3452.align 16 3453.Lcbc_decrypt_bulk: 3454 leaq (%rsp),%r11 3455 pushq %rbp 3456 subq $16,%rsp 3457 andq $-16,%rsp 3458 movq %rcx,%rbp 3459 movups (%r8),%xmm10 3460 movl %r10d,%eax 3461 cmpq $0x50,%rdx 3462 jbe .Lcbc_dec_tail 3463 3464 movups (%rcx),%xmm0 3465 movdqu 0(%rdi),%xmm2 3466 movdqu 16(%rdi),%xmm3 3467 movdqa %xmm2,%xmm11 3468 movdqu 32(%rdi),%xmm4 3469 movdqa %xmm3,%xmm12 3470 movdqu 48(%rdi),%xmm5 3471 movdqa %xmm4,%xmm13 3472 movdqu 64(%rdi),%xmm6 3473 movdqa %xmm5,%xmm14 3474 movdqu 80(%rdi),%xmm7 3475 movdqa %xmm6,%xmm15 3476 movl OPENSSL_ia32cap_P+4(%rip),%r9d 3477 cmpq $0x70,%rdx 3478 jbe .Lcbc_dec_six_or_seven 3479 3480 andl $71303168,%r9d 3481 subq $0x50,%rdx 3482 cmpl $4194304,%r9d 3483 je .Lcbc_dec_loop6_enter 3484 subq $0x20,%rdx 3485 leaq 112(%rcx),%rcx 3486 jmp .Lcbc_dec_loop8_enter 3487.align 16 3488.Lcbc_dec_loop8: 3489 movups %xmm9,(%rsi) 3490 leaq 16(%rsi),%rsi 3491.Lcbc_dec_loop8_enter: 3492 movdqu 96(%rdi),%xmm8 3493 pxor %xmm0,%xmm2 3494 movdqu 112(%rdi),%xmm9 3495 pxor %xmm0,%xmm3 3496 movups 16-112(%rcx),%xmm1 3497 pxor %xmm0,%xmm4 3498 movq $-1,%rbp 3499 cmpq $0x70,%rdx 3500 pxor %xmm0,%xmm5 3501 pxor %xmm0,%xmm6 3502 pxor %xmm0,%xmm7 3503 pxor %xmm0,%xmm8 3504 3505.byte 102,15,56,222,209 3506 pxor %xmm0,%xmm9 3507 movups 32-112(%rcx),%xmm0 3508.byte 102,15,56,222,217 3509.byte 102,15,56,222,225 3510.byte 102,15,56,222,233 3511.byte 102,15,56,222,241 3512.byte 102,15,56,222,249 3513.byte 102,68,15,56,222,193 3514 adcq $0,%rbp 3515 andq $128,%rbp 3516.byte 102,68,15,56,222,201 3517 addq %rdi,%rbp 3518 movups 48-112(%rcx),%xmm1 3519.byte 102,15,56,222,208 3520.byte 102,15,56,222,216 3521.byte 102,15,56,222,224 3522.byte 102,15,56,222,232 3523.byte 102,15,56,222,240 3524.byte 102,15,56,222,248 3525.byte 102,68,15,56,222,192 3526.byte 102,68,15,56,222,200 3527 movups 64-112(%rcx),%xmm0 3528 nop 3529.byte 102,15,56,222,209 3530.byte 102,15,56,222,217 3531.byte 102,15,56,222,225 3532.byte 102,15,56,222,233 3533.byte 102,15,56,222,241 3534.byte 102,15,56,222,249 3535.byte 102,68,15,56,222,193 3536.byte 102,68,15,56,222,201 3537 movups 80-112(%rcx),%xmm1 3538 nop 3539.byte 102,15,56,222,208 3540.byte 102,15,56,222,216 3541.byte 102,15,56,222,224 3542.byte 102,15,56,222,232 3543.byte 102,15,56,222,240 3544.byte 102,15,56,222,248 3545.byte 102,68,15,56,222,192 3546.byte 102,68,15,56,222,200 3547 movups 96-112(%rcx),%xmm0 3548 nop 3549.byte 102,15,56,222,209 3550.byte 102,15,56,222,217 3551.byte 102,15,56,222,225 3552.byte 102,15,56,222,233 3553.byte 102,15,56,222,241 3554.byte 102,15,56,222,249 3555.byte 102,68,15,56,222,193 3556.byte 102,68,15,56,222,201 3557 movups 112-112(%rcx),%xmm1 3558 nop 3559.byte 102,15,56,222,208 3560.byte 102,15,56,222,216 3561.byte 102,15,56,222,224 3562.byte 102,15,56,222,232 3563.byte 102,15,56,222,240 3564.byte 102,15,56,222,248 3565.byte 102,68,15,56,222,192 3566.byte 102,68,15,56,222,200 3567 movups 128-112(%rcx),%xmm0 3568 nop 3569.byte 102,15,56,222,209 3570.byte 102,15,56,222,217 3571.byte 102,15,56,222,225 3572.byte 102,15,56,222,233 3573.byte 102,15,56,222,241 3574.byte 102,15,56,222,249 3575.byte 102,68,15,56,222,193 3576.byte 102,68,15,56,222,201 3577 movups 144-112(%rcx),%xmm1 3578 cmpl $11,%eax 3579.byte 102,15,56,222,208 3580.byte 102,15,56,222,216 3581.byte 102,15,56,222,224 3582.byte 102,15,56,222,232 3583.byte 102,15,56,222,240 3584.byte 102,15,56,222,248 3585.byte 102,68,15,56,222,192 3586.byte 102,68,15,56,222,200 3587 movups 160-112(%rcx),%xmm0 3588 jb .Lcbc_dec_done 3589.byte 102,15,56,222,209 3590.byte 102,15,56,222,217 3591.byte 102,15,56,222,225 3592.byte 102,15,56,222,233 3593.byte 102,15,56,222,241 3594.byte 102,15,56,222,249 3595.byte 102,68,15,56,222,193 3596.byte 102,68,15,56,222,201 3597 movups 176-112(%rcx),%xmm1 3598 nop 3599.byte 102,15,56,222,208 3600.byte 102,15,56,222,216 3601.byte 102,15,56,222,224 3602.byte 102,15,56,222,232 3603.byte 102,15,56,222,240 3604.byte 102,15,56,222,248 3605.byte 102,68,15,56,222,192 3606.byte 102,68,15,56,222,200 3607 movups 192-112(%rcx),%xmm0 3608 je .Lcbc_dec_done 3609.byte 102,15,56,222,209 3610.byte 102,15,56,222,217 3611.byte 102,15,56,222,225 3612.byte 102,15,56,222,233 3613.byte 102,15,56,222,241 3614.byte 102,15,56,222,249 3615.byte 102,68,15,56,222,193 3616.byte 102,68,15,56,222,201 3617 movups 208-112(%rcx),%xmm1 3618 nop 3619.byte 102,15,56,222,208 3620.byte 102,15,56,222,216 3621.byte 102,15,56,222,224 3622.byte 102,15,56,222,232 3623.byte 102,15,56,222,240 3624.byte 102,15,56,222,248 3625.byte 102,68,15,56,222,192 3626.byte 102,68,15,56,222,200 3627 movups 224-112(%rcx),%xmm0 3628 jmp .Lcbc_dec_done 3629.align 16 3630.Lcbc_dec_done: 3631.byte 102,15,56,222,209 3632.byte 102,15,56,222,217 3633 pxor %xmm0,%xmm10 3634 pxor %xmm0,%xmm11 3635.byte 102,15,56,222,225 3636.byte 102,15,56,222,233 3637 pxor %xmm0,%xmm12 3638 pxor %xmm0,%xmm13 3639.byte 102,15,56,222,241 3640.byte 102,15,56,222,249 3641 pxor %xmm0,%xmm14 3642 pxor %xmm0,%xmm15 3643.byte 102,68,15,56,222,193 3644.byte 102,68,15,56,222,201 3645 movdqu 80(%rdi),%xmm1 3646 3647.byte 102,65,15,56,223,210 3648 movdqu 96(%rdi),%xmm10 3649 pxor %xmm0,%xmm1 3650.byte 102,65,15,56,223,219 3651 pxor %xmm0,%xmm10 3652 movdqu 112(%rdi),%xmm0 3653.byte 102,65,15,56,223,228 3654 leaq 128(%rdi),%rdi 3655 movdqu 0(%rbp),%xmm11 3656.byte 102,65,15,56,223,237 3657.byte 102,65,15,56,223,246 3658 movdqu 16(%rbp),%xmm12 3659 movdqu 32(%rbp),%xmm13 3660.byte 102,65,15,56,223,255 3661.byte 102,68,15,56,223,193 3662 movdqu 48(%rbp),%xmm14 3663 movdqu 64(%rbp),%xmm15 3664.byte 102,69,15,56,223,202 3665 movdqa %xmm0,%xmm10 3666 movdqu 80(%rbp),%xmm1 3667 movups -112(%rcx),%xmm0 3668 3669 movups %xmm2,(%rsi) 3670 movdqa %xmm11,%xmm2 3671 movups %xmm3,16(%rsi) 3672 movdqa %xmm12,%xmm3 3673 movups %xmm4,32(%rsi) 3674 movdqa %xmm13,%xmm4 3675 movups %xmm5,48(%rsi) 3676 movdqa %xmm14,%xmm5 3677 movups %xmm6,64(%rsi) 3678 movdqa %xmm15,%xmm6 3679 movups %xmm7,80(%rsi) 3680 movdqa %xmm1,%xmm7 3681 movups %xmm8,96(%rsi) 3682 leaq 112(%rsi),%rsi 3683 3684 subq $0x80,%rdx 3685 ja .Lcbc_dec_loop8 3686 3687 movaps %xmm9,%xmm2 3688 leaq -112(%rcx),%rcx 3689 addq $0x70,%rdx 3690 jle .Lcbc_dec_clear_tail_collected 3691 movups %xmm9,(%rsi) 3692 leaq 16(%rsi),%rsi 3693 cmpq $0x50,%rdx 3694 jbe .Lcbc_dec_tail 3695 3696 movaps %xmm11,%xmm2 3697.Lcbc_dec_six_or_seven: 3698 cmpq $0x60,%rdx 3699 ja .Lcbc_dec_seven 3700 3701 movaps %xmm7,%xmm8 3702 call _aesni_decrypt6 3703 pxor %xmm10,%xmm2 3704 movaps %xmm8,%xmm10 3705 pxor %xmm11,%xmm3 3706 movdqu %xmm2,(%rsi) 3707 pxor %xmm12,%xmm4 3708 movdqu %xmm3,16(%rsi) 3709 pxor %xmm3,%xmm3 3710 pxor %xmm13,%xmm5 3711 movdqu %xmm4,32(%rsi) 3712 pxor %xmm4,%xmm4 3713 pxor %xmm14,%xmm6 3714 movdqu %xmm5,48(%rsi) 3715 pxor %xmm5,%xmm5 3716 pxor %xmm15,%xmm7 3717 movdqu %xmm6,64(%rsi) 3718 pxor %xmm6,%xmm6 3719 leaq 80(%rsi),%rsi 3720 movdqa %xmm7,%xmm2 3721 pxor %xmm7,%xmm7 3722 jmp .Lcbc_dec_tail_collected 3723 3724.align 16 3725.Lcbc_dec_seven: 3726 movups 96(%rdi),%xmm8 3727 xorps %xmm9,%xmm9 3728 call _aesni_decrypt8 3729 movups 80(%rdi),%xmm9 3730 pxor %xmm10,%xmm2 3731 movups 96(%rdi),%xmm10 3732 pxor %xmm11,%xmm3 3733 movdqu %xmm2,(%rsi) 3734 pxor %xmm12,%xmm4 3735 movdqu %xmm3,16(%rsi) 3736 pxor %xmm3,%xmm3 3737 pxor %xmm13,%xmm5 3738 movdqu %xmm4,32(%rsi) 3739 pxor %xmm4,%xmm4 3740 pxor %xmm14,%xmm6 3741 movdqu %xmm5,48(%rsi) 3742 pxor %xmm5,%xmm5 3743 pxor %xmm15,%xmm7 3744 movdqu %xmm6,64(%rsi) 3745 pxor %xmm6,%xmm6 3746 pxor %xmm9,%xmm8 3747 movdqu %xmm7,80(%rsi) 3748 pxor %xmm7,%xmm7 3749 leaq 96(%rsi),%rsi 3750 movdqa %xmm8,%xmm2 3751 pxor %xmm8,%xmm8 3752 pxor %xmm9,%xmm9 3753 jmp .Lcbc_dec_tail_collected 3754 3755.align 16 3756.Lcbc_dec_loop6: 3757 movups %xmm7,(%rsi) 3758 leaq 16(%rsi),%rsi 3759 movdqu 0(%rdi),%xmm2 3760 movdqu 16(%rdi),%xmm3 3761 movdqa %xmm2,%xmm11 3762 movdqu 32(%rdi),%xmm4 3763 movdqa %xmm3,%xmm12 3764 movdqu 48(%rdi),%xmm5 3765 movdqa %xmm4,%xmm13 3766 movdqu 64(%rdi),%xmm6 3767 movdqa %xmm5,%xmm14 3768 movdqu 80(%rdi),%xmm7 3769 movdqa %xmm6,%xmm15 3770.Lcbc_dec_loop6_enter: 3771 leaq 96(%rdi),%rdi 3772 movdqa %xmm7,%xmm8 3773 3774 call _aesni_decrypt6 3775 3776 pxor %xmm10,%xmm2 3777 movdqa %xmm8,%xmm10 3778 pxor %xmm11,%xmm3 3779 movdqu %xmm2,(%rsi) 3780 pxor %xmm12,%xmm4 3781 movdqu %xmm3,16(%rsi) 3782 pxor %xmm13,%xmm5 3783 movdqu %xmm4,32(%rsi) 3784 pxor %xmm14,%xmm6 3785 movq %rbp,%rcx 3786 movdqu %xmm5,48(%rsi) 3787 pxor %xmm15,%xmm7 3788 movl %r10d,%eax 3789 movdqu %xmm6,64(%rsi) 3790 leaq 80(%rsi),%rsi 3791 subq $0x60,%rdx 3792 ja .Lcbc_dec_loop6 3793 3794 movdqa %xmm7,%xmm2 3795 addq $0x50,%rdx 3796 jle .Lcbc_dec_clear_tail_collected 3797 movups %xmm7,(%rsi) 3798 leaq 16(%rsi),%rsi 3799 3800.Lcbc_dec_tail: 3801 movups (%rdi),%xmm2 3802 subq $0x10,%rdx 3803 jbe .Lcbc_dec_one 3804 3805 movups 16(%rdi),%xmm3 3806 movaps %xmm2,%xmm11 3807 subq $0x10,%rdx 3808 jbe .Lcbc_dec_two 3809 3810 movups 32(%rdi),%xmm4 3811 movaps %xmm3,%xmm12 3812 subq $0x10,%rdx 3813 jbe .Lcbc_dec_three 3814 3815 movups 48(%rdi),%xmm5 3816 movaps %xmm4,%xmm13 3817 subq $0x10,%rdx 3818 jbe .Lcbc_dec_four 3819 3820 movups 64(%rdi),%xmm6 3821 movaps %xmm5,%xmm14 3822 movaps %xmm6,%xmm15 3823 xorps %xmm7,%xmm7 3824 call _aesni_decrypt6 3825 pxor %xmm10,%xmm2 3826 movaps %xmm15,%xmm10 3827 pxor %xmm11,%xmm3 3828 movdqu %xmm2,(%rsi) 3829 pxor %xmm12,%xmm4 3830 movdqu %xmm3,16(%rsi) 3831 pxor %xmm3,%xmm3 3832 pxor %xmm13,%xmm5 3833 movdqu %xmm4,32(%rsi) 3834 pxor %xmm4,%xmm4 3835 pxor %xmm14,%xmm6 3836 movdqu %xmm5,48(%rsi) 3837 pxor %xmm5,%xmm5 3838 leaq 64(%rsi),%rsi 3839 movdqa %xmm6,%xmm2 3840 pxor %xmm6,%xmm6 3841 pxor %xmm7,%xmm7 3842 subq $0x10,%rdx 3843 jmp .Lcbc_dec_tail_collected 3844 3845.align 16 3846.Lcbc_dec_one: 3847 movaps %xmm2,%xmm11 3848 movups (%rcx),%xmm0 3849 movups 16(%rcx),%xmm1 3850 leaq 32(%rcx),%rcx 3851 xorps %xmm0,%xmm2 3852.Loop_dec1_17: 3853.byte 102,15,56,222,209 3854 decl %eax 3855 movups (%rcx),%xmm1 3856 leaq 16(%rcx),%rcx 3857 jnz .Loop_dec1_17 3858.byte 102,15,56,223,209 3859 xorps %xmm10,%xmm2 3860 movaps %xmm11,%xmm10 3861 jmp .Lcbc_dec_tail_collected 3862.align 16 3863.Lcbc_dec_two: 3864 movaps %xmm3,%xmm12 3865 call _aesni_decrypt2 3866 pxor %xmm10,%xmm2 3867 movaps %xmm12,%xmm10 3868 pxor %xmm11,%xmm3 3869 movdqu %xmm2,(%rsi) 3870 movdqa %xmm3,%xmm2 3871 pxor %xmm3,%xmm3 3872 leaq 16(%rsi),%rsi 3873 jmp .Lcbc_dec_tail_collected 3874.align 16 3875.Lcbc_dec_three: 3876 movaps %xmm4,%xmm13 3877 call _aesni_decrypt3 3878 pxor %xmm10,%xmm2 3879 movaps %xmm13,%xmm10 3880 pxor %xmm11,%xmm3 3881 movdqu %xmm2,(%rsi) 3882 pxor %xmm12,%xmm4 3883 movdqu %xmm3,16(%rsi) 3884 pxor %xmm3,%xmm3 3885 movdqa %xmm4,%xmm2 3886 pxor %xmm4,%xmm4 3887 leaq 32(%rsi),%rsi 3888 jmp .Lcbc_dec_tail_collected 3889.align 16 3890.Lcbc_dec_four: 3891 movaps %xmm5,%xmm14 3892 call _aesni_decrypt4 3893 pxor %xmm10,%xmm2 3894 movaps %xmm14,%xmm10 3895 pxor %xmm11,%xmm3 3896 movdqu %xmm2,(%rsi) 3897 pxor %xmm12,%xmm4 3898 movdqu %xmm3,16(%rsi) 3899 pxor %xmm3,%xmm3 3900 pxor %xmm13,%xmm5 3901 movdqu %xmm4,32(%rsi) 3902 pxor %xmm4,%xmm4 3903 movdqa %xmm5,%xmm2 3904 pxor %xmm5,%xmm5 3905 leaq 48(%rsi),%rsi 3906 jmp .Lcbc_dec_tail_collected 3907 3908.align 16 3909.Lcbc_dec_clear_tail_collected: 3910 pxor %xmm3,%xmm3 3911 pxor %xmm4,%xmm4 3912 pxor %xmm5,%xmm5 3913 pxor %xmm6,%xmm6 3914 pxor %xmm7,%xmm7 3915 pxor %xmm8,%xmm8 3916 pxor %xmm9,%xmm9 3917.Lcbc_dec_tail_collected: 3918 movups %xmm10,(%r8) 3919 andq $15,%rdx 3920 jnz .Lcbc_dec_tail_partial 3921 movups %xmm2,(%rsi) 3922 pxor %xmm2,%xmm2 3923 jmp .Lcbc_dec_ret 3924.align 16 3925.Lcbc_dec_tail_partial: 3926 movaps %xmm2,(%rsp) 3927 pxor %xmm2,%xmm2 3928 movq $16,%rcx 3929 movq %rsi,%rdi 3930 subq %rdx,%rcx 3931 leaq (%rsp),%rsi 3932.long 0x9066A4F3 3933 movdqa %xmm2,(%rsp) 3934 3935.Lcbc_dec_ret: 3936 xorps %xmm0,%xmm0 3937 pxor %xmm1,%xmm1 3938 movq -8(%r11),%rbp 3939 leaq (%r11),%rsp 3940.Lcbc_ret: 3941 .byte 0xf3,0xc3 3942.size aesni_cbc_encrypt,.-aesni_cbc_encrypt 3943.globl aesni_set_decrypt_key 3944.hidden aesni_set_decrypt_key 3945.type aesni_set_decrypt_key,@function 3946.align 16 3947aesni_set_decrypt_key: 3948.byte 0x48,0x83,0xEC,0x08 3949 call __aesni_set_encrypt_key 3950 shll $4,%esi 3951 testl %eax,%eax 3952 jnz .Ldec_key_ret 3953 leaq 16(%rdx,%rsi,1),%rdi 3954 3955 movups (%rdx),%xmm0 3956 movups (%rdi),%xmm1 3957 movups %xmm0,(%rdi) 3958 movups %xmm1,(%rdx) 3959 leaq 16(%rdx),%rdx 3960 leaq -16(%rdi),%rdi 3961 3962.Ldec_key_inverse: 3963 movups (%rdx),%xmm0 3964 movups (%rdi),%xmm1 3965.byte 102,15,56,219,192 3966.byte 102,15,56,219,201 3967 leaq 16(%rdx),%rdx 3968 leaq -16(%rdi),%rdi 3969 movups %xmm0,16(%rdi) 3970 movups %xmm1,-16(%rdx) 3971 cmpq %rdx,%rdi 3972 ja .Ldec_key_inverse 3973 3974 movups (%rdx),%xmm0 3975.byte 102,15,56,219,192 3976 pxor %xmm1,%xmm1 3977 movups %xmm0,(%rdi) 3978 pxor %xmm0,%xmm0 3979.Ldec_key_ret: 3980 addq $8,%rsp 3981 .byte 0xf3,0xc3 3982.LSEH_end_set_decrypt_key: 3983.size aesni_set_decrypt_key,.-aesni_set_decrypt_key 3984.globl aesni_set_encrypt_key 3985.hidden aesni_set_encrypt_key 3986.type aesni_set_encrypt_key,@function 3987.align 16 3988aesni_set_encrypt_key: 3989__aesni_set_encrypt_key: 3990.byte 0x48,0x83,0xEC,0x08 3991 movq $-1,%rax 3992 testq %rdi,%rdi 3993 jz .Lenc_key_ret 3994 testq %rdx,%rdx 3995 jz .Lenc_key_ret 3996 3997 movl $268437504,%r10d 3998 movups (%rdi),%xmm0 3999 xorps %xmm4,%xmm4 4000 andl OPENSSL_ia32cap_P+4(%rip),%r10d 4001 leaq 16(%rdx),%rax 4002 cmpl $256,%esi 4003 je .L14rounds 4004 cmpl $192,%esi 4005 je .L12rounds 4006 cmpl $128,%esi 4007 jne .Lbad_keybits 4008 4009.L10rounds: 4010 movl $9,%esi 4011 cmpl $268435456,%r10d 4012 je .L10rounds_alt 4013 4014 movups %xmm0,(%rdx) 4015.byte 102,15,58,223,200,1 4016 call .Lkey_expansion_128_cold 4017.byte 102,15,58,223,200,2 4018 call .Lkey_expansion_128 4019.byte 102,15,58,223,200,4 4020 call .Lkey_expansion_128 4021.byte 102,15,58,223,200,8 4022 call .Lkey_expansion_128 4023.byte 102,15,58,223,200,16 4024 call .Lkey_expansion_128 4025.byte 102,15,58,223,200,32 4026 call .Lkey_expansion_128 4027.byte 102,15,58,223,200,64 4028 call .Lkey_expansion_128 4029.byte 102,15,58,223,200,128 4030 call .Lkey_expansion_128 4031.byte 102,15,58,223,200,27 4032 call .Lkey_expansion_128 4033.byte 102,15,58,223,200,54 4034 call .Lkey_expansion_128 4035 movups %xmm0,(%rax) 4036 movl %esi,80(%rax) 4037 xorl %eax,%eax 4038 jmp .Lenc_key_ret 4039 4040.align 16 4041.L10rounds_alt: 4042 movdqa .Lkey_rotate(%rip),%xmm5 4043 movl $8,%r10d 4044 movdqa .Lkey_rcon1(%rip),%xmm4 4045 movdqa %xmm0,%xmm2 4046 movdqu %xmm0,(%rdx) 4047 jmp .Loop_key128 4048 4049.align 16 4050.Loop_key128: 4051.byte 102,15,56,0,197 4052.byte 102,15,56,221,196 4053 pslld $1,%xmm4 4054 leaq 16(%rax),%rax 4055 4056 movdqa %xmm2,%xmm3 4057 pslldq $4,%xmm2 4058 pxor %xmm2,%xmm3 4059 pslldq $4,%xmm2 4060 pxor %xmm2,%xmm3 4061 pslldq $4,%xmm2 4062 pxor %xmm3,%xmm2 4063 4064 pxor %xmm2,%xmm0 4065 movdqu %xmm0,-16(%rax) 4066 movdqa %xmm0,%xmm2 4067 4068 decl %r10d 4069 jnz .Loop_key128 4070 4071 movdqa .Lkey_rcon1b(%rip),%xmm4 4072 4073.byte 102,15,56,0,197 4074.byte 102,15,56,221,196 4075 pslld $1,%xmm4 4076 4077 movdqa %xmm2,%xmm3 4078 pslldq $4,%xmm2 4079 pxor %xmm2,%xmm3 4080 pslldq $4,%xmm2 4081 pxor %xmm2,%xmm3 4082 pslldq $4,%xmm2 4083 pxor %xmm3,%xmm2 4084 4085 pxor %xmm2,%xmm0 4086 movdqu %xmm0,(%rax) 4087 4088 movdqa %xmm0,%xmm2 4089.byte 102,15,56,0,197 4090.byte 102,15,56,221,196 4091 4092 movdqa %xmm2,%xmm3 4093 pslldq $4,%xmm2 4094 pxor %xmm2,%xmm3 4095 pslldq $4,%xmm2 4096 pxor %xmm2,%xmm3 4097 pslldq $4,%xmm2 4098 pxor %xmm3,%xmm2 4099 4100 pxor %xmm2,%xmm0 4101 movdqu %xmm0,16(%rax) 4102 4103 movl %esi,96(%rax) 4104 xorl %eax,%eax 4105 jmp .Lenc_key_ret 4106 4107.align 16 4108.L12rounds: 4109 movq 16(%rdi),%xmm2 4110 movl $11,%esi 4111 cmpl $268435456,%r10d 4112 je .L12rounds_alt 4113 4114 movups %xmm0,(%rdx) 4115.byte 102,15,58,223,202,1 4116 call .Lkey_expansion_192a_cold 4117.byte 102,15,58,223,202,2 4118 call .Lkey_expansion_192b 4119.byte 102,15,58,223,202,4 4120 call .Lkey_expansion_192a 4121.byte 102,15,58,223,202,8 4122 call .Lkey_expansion_192b 4123.byte 102,15,58,223,202,16 4124 call .Lkey_expansion_192a 4125.byte 102,15,58,223,202,32 4126 call .Lkey_expansion_192b 4127.byte 102,15,58,223,202,64 4128 call .Lkey_expansion_192a 4129.byte 102,15,58,223,202,128 4130 call .Lkey_expansion_192b 4131 movups %xmm0,(%rax) 4132 movl %esi,48(%rax) 4133 xorq %rax,%rax 4134 jmp .Lenc_key_ret 4135 4136.align 16 4137.L12rounds_alt: 4138 movdqa .Lkey_rotate192(%rip),%xmm5 4139 movdqa .Lkey_rcon1(%rip),%xmm4 4140 movl $8,%r10d 4141 movdqu %xmm0,(%rdx) 4142 jmp .Loop_key192 4143 4144.align 16 4145.Loop_key192: 4146 movq %xmm2,0(%rax) 4147 movdqa %xmm2,%xmm1 4148.byte 102,15,56,0,213 4149.byte 102,15,56,221,212 4150 pslld $1,%xmm4 4151 leaq 24(%rax),%rax 4152 4153 movdqa %xmm0,%xmm3 4154 pslldq $4,%xmm0 4155 pxor %xmm0,%xmm3 4156 pslldq $4,%xmm0 4157 pxor %xmm0,%xmm3 4158 pslldq $4,%xmm0 4159 pxor %xmm3,%xmm0 4160 4161 pshufd $0xff,%xmm0,%xmm3 4162 pxor %xmm1,%xmm3 4163 pslldq $4,%xmm1 4164 pxor %xmm1,%xmm3 4165 4166 pxor %xmm2,%xmm0 4167 pxor %xmm3,%xmm2 4168 movdqu %xmm0,-16(%rax) 4169 4170 decl %r10d 4171 jnz .Loop_key192 4172 4173 movl %esi,32(%rax) 4174 xorl %eax,%eax 4175 jmp .Lenc_key_ret 4176 4177.align 16 4178.L14rounds: 4179 movups 16(%rdi),%xmm2 4180 movl $13,%esi 4181 leaq 16(%rax),%rax 4182 cmpl $268435456,%r10d 4183 je .L14rounds_alt 4184 4185 movups %xmm0,(%rdx) 4186 movups %xmm2,16(%rdx) 4187.byte 102,15,58,223,202,1 4188 call .Lkey_expansion_256a_cold 4189.byte 102,15,58,223,200,1 4190 call .Lkey_expansion_256b 4191.byte 102,15,58,223,202,2 4192 call .Lkey_expansion_256a 4193.byte 102,15,58,223,200,2 4194 call .Lkey_expansion_256b 4195.byte 102,15,58,223,202,4 4196 call .Lkey_expansion_256a 4197.byte 102,15,58,223,200,4 4198 call .Lkey_expansion_256b 4199.byte 102,15,58,223,202,8 4200 call .Lkey_expansion_256a 4201.byte 102,15,58,223,200,8 4202 call .Lkey_expansion_256b 4203.byte 102,15,58,223,202,16 4204 call .Lkey_expansion_256a 4205.byte 102,15,58,223,200,16 4206 call .Lkey_expansion_256b 4207.byte 102,15,58,223,202,32 4208 call .Lkey_expansion_256a 4209.byte 102,15,58,223,200,32 4210 call .Lkey_expansion_256b 4211.byte 102,15,58,223,202,64 4212 call .Lkey_expansion_256a 4213 movups %xmm0,(%rax) 4214 movl %esi,16(%rax) 4215 xorq %rax,%rax 4216 jmp .Lenc_key_ret 4217 4218.align 16 4219.L14rounds_alt: 4220 movdqa .Lkey_rotate(%rip),%xmm5 4221 movdqa .Lkey_rcon1(%rip),%xmm4 4222 movl $7,%r10d 4223 movdqu %xmm0,0(%rdx) 4224 movdqa %xmm2,%xmm1 4225 movdqu %xmm2,16(%rdx) 4226 jmp .Loop_key256 4227 4228.align 16 4229.Loop_key256: 4230.byte 102,15,56,0,213 4231.byte 102,15,56,221,212 4232 4233 movdqa %xmm0,%xmm3 4234 pslldq $4,%xmm0 4235 pxor %xmm0,%xmm3 4236 pslldq $4,%xmm0 4237 pxor %xmm0,%xmm3 4238 pslldq $4,%xmm0 4239 pxor %xmm3,%xmm0 4240 pslld $1,%xmm4 4241 4242 pxor %xmm2,%xmm0 4243 movdqu %xmm0,(%rax) 4244 4245 decl %r10d 4246 jz .Ldone_key256 4247 4248 pshufd $0xff,%xmm0,%xmm2 4249 pxor %xmm3,%xmm3 4250.byte 102,15,56,221,211 4251 4252 movdqa %xmm1,%xmm3 4253 pslldq $4,%xmm1 4254 pxor %xmm1,%xmm3 4255 pslldq $4,%xmm1 4256 pxor %xmm1,%xmm3 4257 pslldq $4,%xmm1 4258 pxor %xmm3,%xmm1 4259 4260 pxor %xmm1,%xmm2 4261 movdqu %xmm2,16(%rax) 4262 leaq 32(%rax),%rax 4263 movdqa %xmm2,%xmm1 4264 4265 jmp .Loop_key256 4266 4267.Ldone_key256: 4268 movl %esi,16(%rax) 4269 xorl %eax,%eax 4270 jmp .Lenc_key_ret 4271 4272.align 16 4273.Lbad_keybits: 4274 movq $-2,%rax 4275.Lenc_key_ret: 4276 pxor %xmm0,%xmm0 4277 pxor %xmm1,%xmm1 4278 pxor %xmm2,%xmm2 4279 pxor %xmm3,%xmm3 4280 pxor %xmm4,%xmm4 4281 pxor %xmm5,%xmm5 4282 addq $8,%rsp 4283 .byte 0xf3,0xc3 4284.LSEH_end_set_encrypt_key: 4285 4286.align 16 4287.Lkey_expansion_128: 4288 movups %xmm0,(%rax) 4289 leaq 16(%rax),%rax 4290.Lkey_expansion_128_cold: 4291 shufps $16,%xmm0,%xmm4 4292 xorps %xmm4,%xmm0 4293 shufps $140,%xmm0,%xmm4 4294 xorps %xmm4,%xmm0 4295 shufps $255,%xmm1,%xmm1 4296 xorps %xmm1,%xmm0 4297 .byte 0xf3,0xc3 4298 4299.align 16 4300.Lkey_expansion_192a: 4301 movups %xmm0,(%rax) 4302 leaq 16(%rax),%rax 4303.Lkey_expansion_192a_cold: 4304 movaps %xmm2,%xmm5 4305.Lkey_expansion_192b_warm: 4306 shufps $16,%xmm0,%xmm4 4307 movdqa %xmm2,%xmm3 4308 xorps %xmm4,%xmm0 4309 shufps $140,%xmm0,%xmm4 4310 pslldq $4,%xmm3 4311 xorps %xmm4,%xmm0 4312 pshufd $85,%xmm1,%xmm1 4313 pxor %xmm3,%xmm2 4314 pxor %xmm1,%xmm0 4315 pshufd $255,%xmm0,%xmm3 4316 pxor %xmm3,%xmm2 4317 .byte 0xf3,0xc3 4318 4319.align 16 4320.Lkey_expansion_192b: 4321 movaps %xmm0,%xmm3 4322 shufps $68,%xmm0,%xmm5 4323 movups %xmm5,(%rax) 4324 shufps $78,%xmm2,%xmm3 4325 movups %xmm3,16(%rax) 4326 leaq 32(%rax),%rax 4327 jmp .Lkey_expansion_192b_warm 4328 4329.align 16 4330.Lkey_expansion_256a: 4331 movups %xmm2,(%rax) 4332 leaq 16(%rax),%rax 4333.Lkey_expansion_256a_cold: 4334 shufps $16,%xmm0,%xmm4 4335 xorps %xmm4,%xmm0 4336 shufps $140,%xmm0,%xmm4 4337 xorps %xmm4,%xmm0 4338 shufps $255,%xmm1,%xmm1 4339 xorps %xmm1,%xmm0 4340 .byte 0xf3,0xc3 4341 4342.align 16 4343.Lkey_expansion_256b: 4344 movups %xmm0,(%rax) 4345 leaq 16(%rax),%rax 4346 4347 shufps $16,%xmm2,%xmm4 4348 xorps %xmm4,%xmm2 4349 shufps $140,%xmm2,%xmm4 4350 xorps %xmm4,%xmm2 4351 shufps $170,%xmm1,%xmm1 4352 xorps %xmm1,%xmm2 4353 .byte 0xf3,0xc3 4354.size aesni_set_encrypt_key,.-aesni_set_encrypt_key 4355.size __aesni_set_encrypt_key,.-__aesni_set_encrypt_key 4356.align 64 4357.Lbswap_mask: 4358.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 4359.Lincrement32: 4360.long 6,6,6,0 4361.Lincrement64: 4362.long 1,0,0,0 4363.Lxts_magic: 4364.long 0x87,0,1,0 4365.Lincrement1: 4366.byte 0,0,0,0,0,0,0,0,0,0,0,0,0,0,0,1 4367.Lkey_rotate: 4368.long 0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d,0x0c0f0e0d 4369.Lkey_rotate192: 4370.long 0x04070605,0x04070605,0x04070605,0x04070605 4371.Lkey_rcon1: 4372.long 1,1,1,1 4373.Lkey_rcon1b: 4374.long 0x1b,0x1b,0x1b,0x1b 4375 4376.byte 65,69,83,32,102,111,114,32,73,110,116,101,108,32,65,69,83,45,78,73,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0 4377.align 64 4378#endif 4379