1#if defined(__x86_64__) 2.text 3.extern OPENSSL_ia32cap_P 4.hidden OPENSSL_ia32cap_P 5 6chacha20_poly1305_constants: 7 8.align 64 9.chacha20_consts: 10.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 11.byte 'e','x','p','a','n','d',' ','3','2','-','b','y','t','e',' ','k' 12.rol8: 13.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 14.byte 3,0,1,2, 7,4,5,6, 11,8,9,10, 15,12,13,14 15.rol16: 16.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 17.byte 2,3,0,1, 6,7,4,5, 10,11,8,9, 14,15,12,13 18.avx2_init: 19.long 0,0,0,0 20.sse_inc: 21.long 1,0,0,0 22.avx2_inc: 23.long 2,0,0,0,2,0,0,0 24.clamp: 25.quad 0x0FFFFFFC0FFFFFFF, 0x0FFFFFFC0FFFFFFC 26.quad 0xFFFFFFFFFFFFFFFF, 0xFFFFFFFFFFFFFFFF 27.align 16 28.and_masks: 29.byte 0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 30.byte 0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 31.byte 0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 32.byte 0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 33.byte 0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 34.byte 0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 35.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 36.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00 37.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00,0x00 38.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00,0x00 39.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00,0x00 40.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00,0x00 41.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00,0x00 42.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00,0x00 43.byte 0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0xff,0x00 44 45.type poly_hash_ad_internal,@function 46.align 64 47poly_hash_ad_internal: 48.cfi_startproc 49 xorq %r10,%r10 50 xorq %r11,%r11 51 xorq %r12,%r12 52 cmpq $13,%r8 53 jne hash_ad_loop 54poly_fast_tls_ad: 55 56 movq (%rcx),%r10 57 movq 5(%rcx),%r11 58 shrq $24,%r11 59 movq $1,%r12 60 movq 0+0(%rbp),%rax 61 movq %rax,%r15 62 mulq %r10 63 movq %rax,%r13 64 movq %rdx,%r14 65 movq 0+0(%rbp),%rax 66 mulq %r11 67 imulq %r12,%r15 68 addq %rax,%r14 69 adcq %rdx,%r15 70 movq 8+0(%rbp),%rax 71 movq %rax,%r9 72 mulq %r10 73 addq %rax,%r14 74 adcq $0,%rdx 75 movq %rdx,%r10 76 movq 8+0(%rbp),%rax 77 mulq %r11 78 addq %rax,%r15 79 adcq $0,%rdx 80 imulq %r12,%r9 81 addq %r10,%r15 82 adcq %rdx,%r9 83 movq %r13,%r10 84 movq %r14,%r11 85 movq %r15,%r12 86 andq $3,%r12 87 movq %r15,%r13 88 andq $-4,%r13 89 movq %r9,%r14 90 shrdq $2,%r9,%r15 91 shrq $2,%r9 92 addq %r13,%r10 93 adcq %r14,%r11 94 adcq $0,%r12 95 addq %r15,%r10 96 adcq %r9,%r11 97 adcq $0,%r12 98 99 .byte 0xf3,0xc3 100hash_ad_loop: 101 102 cmpq $16,%r8 103 jb hash_ad_tail 104 addq 0(%rcx),%r10 105 adcq 8+0(%rcx),%r11 106 adcq $1,%r12 107 movq 0+0(%rbp),%rax 108 movq %rax,%r15 109 mulq %r10 110 movq %rax,%r13 111 movq %rdx,%r14 112 movq 0+0(%rbp),%rax 113 mulq %r11 114 imulq %r12,%r15 115 addq %rax,%r14 116 adcq %rdx,%r15 117 movq 8+0(%rbp),%rax 118 movq %rax,%r9 119 mulq %r10 120 addq %rax,%r14 121 adcq $0,%rdx 122 movq %rdx,%r10 123 movq 8+0(%rbp),%rax 124 mulq %r11 125 addq %rax,%r15 126 adcq $0,%rdx 127 imulq %r12,%r9 128 addq %r10,%r15 129 adcq %rdx,%r9 130 movq %r13,%r10 131 movq %r14,%r11 132 movq %r15,%r12 133 andq $3,%r12 134 movq %r15,%r13 135 andq $-4,%r13 136 movq %r9,%r14 137 shrdq $2,%r9,%r15 138 shrq $2,%r9 139 addq %r13,%r10 140 adcq %r14,%r11 141 adcq $0,%r12 142 addq %r15,%r10 143 adcq %r9,%r11 144 adcq $0,%r12 145 146 leaq 16(%rcx),%rcx 147 subq $16,%r8 148 jmp hash_ad_loop 149hash_ad_tail: 150 cmpq $0,%r8 151 je 1f 152 153 xorq %r13,%r13 154 xorq %r14,%r14 155 xorq %r15,%r15 156 addq %r8,%rcx 157hash_ad_tail_loop: 158 shldq $8,%r13,%r14 159 shlq $8,%r13 160 movzbq -1(%rcx),%r15 161 xorq %r15,%r13 162 decq %rcx 163 decq %r8 164 jne hash_ad_tail_loop 165 166 addq %r13,%r10 167 adcq %r14,%r11 168 adcq $1,%r12 169 movq 0+0(%rbp),%rax 170 movq %rax,%r15 171 mulq %r10 172 movq %rax,%r13 173 movq %rdx,%r14 174 movq 0+0(%rbp),%rax 175 mulq %r11 176 imulq %r12,%r15 177 addq %rax,%r14 178 adcq %rdx,%r15 179 movq 8+0(%rbp),%rax 180 movq %rax,%r9 181 mulq %r10 182 addq %rax,%r14 183 adcq $0,%rdx 184 movq %rdx,%r10 185 movq 8+0(%rbp),%rax 186 mulq %r11 187 addq %rax,%r15 188 adcq $0,%rdx 189 imulq %r12,%r9 190 addq %r10,%r15 191 adcq %rdx,%r9 192 movq %r13,%r10 193 movq %r14,%r11 194 movq %r15,%r12 195 andq $3,%r12 196 movq %r15,%r13 197 andq $-4,%r13 198 movq %r9,%r14 199 shrdq $2,%r9,%r15 200 shrq $2,%r9 201 addq %r13,%r10 202 adcq %r14,%r11 203 adcq $0,%r12 204 addq %r15,%r10 205 adcq %r9,%r11 206 adcq $0,%r12 207 208 2091: 210 .byte 0xf3,0xc3 211.cfi_endproc 212.size poly_hash_ad_internal, .-poly_hash_ad_internal 213 214.globl chacha20_poly1305_open 215.hidden chacha20_poly1305_open 216.type chacha20_poly1305_open,@function 217.align 64 218chacha20_poly1305_open: 219.cfi_startproc 220 pushq %rbp 221.cfi_adjust_cfa_offset 8 222 pushq %rbx 223.cfi_adjust_cfa_offset 8 224 pushq %r12 225.cfi_adjust_cfa_offset 8 226 pushq %r13 227.cfi_adjust_cfa_offset 8 228 pushq %r14 229.cfi_adjust_cfa_offset 8 230 pushq %r15 231.cfi_adjust_cfa_offset 8 232 233 234 pushq %r9 235.cfi_adjust_cfa_offset 8 236 subq $288 + 32,%rsp 237.cfi_adjust_cfa_offset 288 + 32 238.cfi_offset rbp, -16 239.cfi_offset rbx, -24 240.cfi_offset r12, -32 241.cfi_offset r13, -40 242.cfi_offset r14, -48 243.cfi_offset r15, -56 244 leaq 32(%rsp),%rbp 245 andq $-32,%rbp 246 movq %rdx,8+32(%rbp) 247 movq %r8,0+32(%rbp) 248 movq %rdx,%rbx 249 250 movl OPENSSL_ia32cap_P+8(%rip),%eax 251 andl $288,%eax 252 xorl $288,%eax 253 jz chacha20_poly1305_open_avx2 254 2551: 256 cmpq $128,%rbx 257 jbe open_sse_128 258 259 movdqa .chacha20_consts(%rip),%xmm0 260 movdqu 0(%r9),%xmm4 261 movdqu 16(%r9),%xmm8 262 movdqu 32(%r9),%xmm12 263 movdqa %xmm12,%xmm7 264 265 movdqa %xmm4,48(%rbp) 266 movdqa %xmm8,64(%rbp) 267 movdqa %xmm12,96(%rbp) 268 movq $10,%r10 2691: 270 paddd %xmm4,%xmm0 271 pxor %xmm0,%xmm12 272 pshufb .rol16(%rip),%xmm12 273 paddd %xmm12,%xmm8 274 pxor %xmm8,%xmm4 275 movdqa %xmm4,%xmm3 276 pslld $12,%xmm3 277 psrld $20,%xmm4 278 pxor %xmm3,%xmm4 279 paddd %xmm4,%xmm0 280 pxor %xmm0,%xmm12 281 pshufb .rol8(%rip),%xmm12 282 paddd %xmm12,%xmm8 283 pxor %xmm8,%xmm4 284 movdqa %xmm4,%xmm3 285 pslld $7,%xmm3 286 psrld $25,%xmm4 287 pxor %xmm3,%xmm4 288.byte 102,15,58,15,228,4 289.byte 102,69,15,58,15,192,8 290.byte 102,69,15,58,15,228,12 291 paddd %xmm4,%xmm0 292 pxor %xmm0,%xmm12 293 pshufb .rol16(%rip),%xmm12 294 paddd %xmm12,%xmm8 295 pxor %xmm8,%xmm4 296 movdqa %xmm4,%xmm3 297 pslld $12,%xmm3 298 psrld $20,%xmm4 299 pxor %xmm3,%xmm4 300 paddd %xmm4,%xmm0 301 pxor %xmm0,%xmm12 302 pshufb .rol8(%rip),%xmm12 303 paddd %xmm12,%xmm8 304 pxor %xmm8,%xmm4 305 movdqa %xmm4,%xmm3 306 pslld $7,%xmm3 307 psrld $25,%xmm4 308 pxor %xmm3,%xmm4 309.byte 102,15,58,15,228,12 310.byte 102,69,15,58,15,192,8 311.byte 102,69,15,58,15,228,4 312 313 decq %r10 314 jne 1b 315 316 paddd .chacha20_consts(%rip),%xmm0 317 paddd 48(%rbp),%xmm4 318 319 pand .clamp(%rip),%xmm0 320 movdqa %xmm0,0(%rbp) 321 movdqa %xmm4,16(%rbp) 322 323 movq %r8,%r8 324 call poly_hash_ad_internal 325open_sse_main_loop: 326 cmpq $256,%rbx 327 jb 2f 328 329 movdqa .chacha20_consts(%rip),%xmm0 330 movdqa 48(%rbp),%xmm4 331 movdqa 64(%rbp),%xmm8 332 movdqa %xmm0,%xmm1 333 movdqa %xmm4,%xmm5 334 movdqa %xmm8,%xmm9 335 movdqa %xmm0,%xmm2 336 movdqa %xmm4,%xmm6 337 movdqa %xmm8,%xmm10 338 movdqa %xmm0,%xmm3 339 movdqa %xmm4,%xmm7 340 movdqa %xmm8,%xmm11 341 movdqa 96(%rbp),%xmm15 342 paddd .sse_inc(%rip),%xmm15 343 movdqa %xmm15,%xmm14 344 paddd .sse_inc(%rip),%xmm14 345 movdqa %xmm14,%xmm13 346 paddd .sse_inc(%rip),%xmm13 347 movdqa %xmm13,%xmm12 348 paddd .sse_inc(%rip),%xmm12 349 movdqa %xmm12,96(%rbp) 350 movdqa %xmm13,112(%rbp) 351 movdqa %xmm14,128(%rbp) 352 movdqa %xmm15,144(%rbp) 353 354 355 356 movq $4,%rcx 357 movq %rsi,%r8 3581: 359 movdqa %xmm8,80(%rbp) 360 movdqa .rol16(%rip),%xmm8 361 paddd %xmm7,%xmm3 362 paddd %xmm6,%xmm2 363 paddd %xmm5,%xmm1 364 paddd %xmm4,%xmm0 365 pxor %xmm3,%xmm15 366 pxor %xmm2,%xmm14 367 pxor %xmm1,%xmm13 368 pxor %xmm0,%xmm12 369.byte 102,69,15,56,0,248 370.byte 102,69,15,56,0,240 371.byte 102,69,15,56,0,232 372.byte 102,69,15,56,0,224 373 movdqa 80(%rbp),%xmm8 374 paddd %xmm15,%xmm11 375 paddd %xmm14,%xmm10 376 paddd %xmm13,%xmm9 377 paddd %xmm12,%xmm8 378 pxor %xmm11,%xmm7 379 addq 0(%r8),%r10 380 adcq 8+0(%r8),%r11 381 adcq $1,%r12 382 383 leaq 16(%r8),%r8 384 pxor %xmm10,%xmm6 385 pxor %xmm9,%xmm5 386 pxor %xmm8,%xmm4 387 movdqa %xmm8,80(%rbp) 388 movdqa %xmm7,%xmm8 389 psrld $20,%xmm8 390 pslld $32-20,%xmm7 391 pxor %xmm8,%xmm7 392 movdqa %xmm6,%xmm8 393 psrld $20,%xmm8 394 pslld $32-20,%xmm6 395 pxor %xmm8,%xmm6 396 movdqa %xmm5,%xmm8 397 psrld $20,%xmm8 398 pslld $32-20,%xmm5 399 pxor %xmm8,%xmm5 400 movdqa %xmm4,%xmm8 401 psrld $20,%xmm8 402 pslld $32-20,%xmm4 403 pxor %xmm8,%xmm4 404 movq 0+0(%rbp),%rax 405 movq %rax,%r15 406 mulq %r10 407 movq %rax,%r13 408 movq %rdx,%r14 409 movq 0+0(%rbp),%rax 410 mulq %r11 411 imulq %r12,%r15 412 addq %rax,%r14 413 adcq %rdx,%r15 414 movdqa .rol8(%rip),%xmm8 415 paddd %xmm7,%xmm3 416 paddd %xmm6,%xmm2 417 paddd %xmm5,%xmm1 418 paddd %xmm4,%xmm0 419 pxor %xmm3,%xmm15 420 pxor %xmm2,%xmm14 421 pxor %xmm1,%xmm13 422 pxor %xmm0,%xmm12 423.byte 102,69,15,56,0,248 424.byte 102,69,15,56,0,240 425.byte 102,69,15,56,0,232 426.byte 102,69,15,56,0,224 427 movdqa 80(%rbp),%xmm8 428 paddd %xmm15,%xmm11 429 paddd %xmm14,%xmm10 430 paddd %xmm13,%xmm9 431 paddd %xmm12,%xmm8 432 pxor %xmm11,%xmm7 433 pxor %xmm10,%xmm6 434 movq 8+0(%rbp),%rax 435 movq %rax,%r9 436 mulq %r10 437 addq %rax,%r14 438 adcq $0,%rdx 439 movq %rdx,%r10 440 movq 8+0(%rbp),%rax 441 mulq %r11 442 addq %rax,%r15 443 adcq $0,%rdx 444 pxor %xmm9,%xmm5 445 pxor %xmm8,%xmm4 446 movdqa %xmm8,80(%rbp) 447 movdqa %xmm7,%xmm8 448 psrld $25,%xmm8 449 pslld $32-25,%xmm7 450 pxor %xmm8,%xmm7 451 movdqa %xmm6,%xmm8 452 psrld $25,%xmm8 453 pslld $32-25,%xmm6 454 pxor %xmm8,%xmm6 455 movdqa %xmm5,%xmm8 456 psrld $25,%xmm8 457 pslld $32-25,%xmm5 458 pxor %xmm8,%xmm5 459 movdqa %xmm4,%xmm8 460 psrld $25,%xmm8 461 pslld $32-25,%xmm4 462 pxor %xmm8,%xmm4 463 movdqa 80(%rbp),%xmm8 464 imulq %r12,%r9 465 addq %r10,%r15 466 adcq %rdx,%r9 467.byte 102,15,58,15,255,4 468.byte 102,69,15,58,15,219,8 469.byte 102,69,15,58,15,255,12 470.byte 102,15,58,15,246,4 471.byte 102,69,15,58,15,210,8 472.byte 102,69,15,58,15,246,12 473.byte 102,15,58,15,237,4 474.byte 102,69,15,58,15,201,8 475.byte 102,69,15,58,15,237,12 476.byte 102,15,58,15,228,4 477.byte 102,69,15,58,15,192,8 478.byte 102,69,15,58,15,228,12 479 movdqa %xmm8,80(%rbp) 480 movdqa .rol16(%rip),%xmm8 481 paddd %xmm7,%xmm3 482 paddd %xmm6,%xmm2 483 paddd %xmm5,%xmm1 484 paddd %xmm4,%xmm0 485 pxor %xmm3,%xmm15 486 pxor %xmm2,%xmm14 487 movq %r13,%r10 488 movq %r14,%r11 489 movq %r15,%r12 490 andq $3,%r12 491 movq %r15,%r13 492 andq $-4,%r13 493 movq %r9,%r14 494 shrdq $2,%r9,%r15 495 shrq $2,%r9 496 addq %r13,%r10 497 adcq %r14,%r11 498 adcq $0,%r12 499 addq %r15,%r10 500 adcq %r9,%r11 501 adcq $0,%r12 502 pxor %xmm1,%xmm13 503 pxor %xmm0,%xmm12 504.byte 102,69,15,56,0,248 505.byte 102,69,15,56,0,240 506.byte 102,69,15,56,0,232 507.byte 102,69,15,56,0,224 508 movdqa 80(%rbp),%xmm8 509 paddd %xmm15,%xmm11 510 paddd %xmm14,%xmm10 511 paddd %xmm13,%xmm9 512 paddd %xmm12,%xmm8 513 pxor %xmm11,%xmm7 514 pxor %xmm10,%xmm6 515 pxor %xmm9,%xmm5 516 pxor %xmm8,%xmm4 517 movdqa %xmm8,80(%rbp) 518 movdqa %xmm7,%xmm8 519 psrld $20,%xmm8 520 pslld $32-20,%xmm7 521 pxor %xmm8,%xmm7 522 movdqa %xmm6,%xmm8 523 psrld $20,%xmm8 524 pslld $32-20,%xmm6 525 pxor %xmm8,%xmm6 526 movdqa %xmm5,%xmm8 527 psrld $20,%xmm8 528 pslld $32-20,%xmm5 529 pxor %xmm8,%xmm5 530 movdqa %xmm4,%xmm8 531 psrld $20,%xmm8 532 pslld $32-20,%xmm4 533 pxor %xmm8,%xmm4 534 movdqa .rol8(%rip),%xmm8 535 paddd %xmm7,%xmm3 536 paddd %xmm6,%xmm2 537 paddd %xmm5,%xmm1 538 paddd %xmm4,%xmm0 539 pxor %xmm3,%xmm15 540 pxor %xmm2,%xmm14 541 pxor %xmm1,%xmm13 542 pxor %xmm0,%xmm12 543.byte 102,69,15,56,0,248 544.byte 102,69,15,56,0,240 545.byte 102,69,15,56,0,232 546.byte 102,69,15,56,0,224 547 movdqa 80(%rbp),%xmm8 548 paddd %xmm15,%xmm11 549 paddd %xmm14,%xmm10 550 paddd %xmm13,%xmm9 551 paddd %xmm12,%xmm8 552 pxor %xmm11,%xmm7 553 pxor %xmm10,%xmm6 554 pxor %xmm9,%xmm5 555 pxor %xmm8,%xmm4 556 movdqa %xmm8,80(%rbp) 557 movdqa %xmm7,%xmm8 558 psrld $25,%xmm8 559 pslld $32-25,%xmm7 560 pxor %xmm8,%xmm7 561 movdqa %xmm6,%xmm8 562 psrld $25,%xmm8 563 pslld $32-25,%xmm6 564 pxor %xmm8,%xmm6 565 movdqa %xmm5,%xmm8 566 psrld $25,%xmm8 567 pslld $32-25,%xmm5 568 pxor %xmm8,%xmm5 569 movdqa %xmm4,%xmm8 570 psrld $25,%xmm8 571 pslld $32-25,%xmm4 572 pxor %xmm8,%xmm4 573 movdqa 80(%rbp),%xmm8 574.byte 102,15,58,15,255,12 575.byte 102,69,15,58,15,219,8 576.byte 102,69,15,58,15,255,4 577.byte 102,15,58,15,246,12 578.byte 102,69,15,58,15,210,8 579.byte 102,69,15,58,15,246,4 580.byte 102,15,58,15,237,12 581.byte 102,69,15,58,15,201,8 582.byte 102,69,15,58,15,237,4 583.byte 102,15,58,15,228,12 584.byte 102,69,15,58,15,192,8 585.byte 102,69,15,58,15,228,4 586 587 decq %rcx 588 jge 1b 589 addq 0(%r8),%r10 590 adcq 8+0(%r8),%r11 591 adcq $1,%r12 592 movq 0+0(%rbp),%rax 593 movq %rax,%r15 594 mulq %r10 595 movq %rax,%r13 596 movq %rdx,%r14 597 movq 0+0(%rbp),%rax 598 mulq %r11 599 imulq %r12,%r15 600 addq %rax,%r14 601 adcq %rdx,%r15 602 movq 8+0(%rbp),%rax 603 movq %rax,%r9 604 mulq %r10 605 addq %rax,%r14 606 adcq $0,%rdx 607 movq %rdx,%r10 608 movq 8+0(%rbp),%rax 609 mulq %r11 610 addq %rax,%r15 611 adcq $0,%rdx 612 imulq %r12,%r9 613 addq %r10,%r15 614 adcq %rdx,%r9 615 movq %r13,%r10 616 movq %r14,%r11 617 movq %r15,%r12 618 andq $3,%r12 619 movq %r15,%r13 620 andq $-4,%r13 621 movq %r9,%r14 622 shrdq $2,%r9,%r15 623 shrq $2,%r9 624 addq %r13,%r10 625 adcq %r14,%r11 626 adcq $0,%r12 627 addq %r15,%r10 628 adcq %r9,%r11 629 adcq $0,%r12 630 631 leaq 16(%r8),%r8 632 cmpq $-6,%rcx 633 jg 1b 634 paddd .chacha20_consts(%rip),%xmm3 635 paddd 48(%rbp),%xmm7 636 paddd 64(%rbp),%xmm11 637 paddd 144(%rbp),%xmm15 638 paddd .chacha20_consts(%rip),%xmm2 639 paddd 48(%rbp),%xmm6 640 paddd 64(%rbp),%xmm10 641 paddd 128(%rbp),%xmm14 642 paddd .chacha20_consts(%rip),%xmm1 643 paddd 48(%rbp),%xmm5 644 paddd 64(%rbp),%xmm9 645 paddd 112(%rbp),%xmm13 646 paddd .chacha20_consts(%rip),%xmm0 647 paddd 48(%rbp),%xmm4 648 paddd 64(%rbp),%xmm8 649 paddd 96(%rbp),%xmm12 650 movdqa %xmm12,80(%rbp) 651 movdqu 0 + 0(%rsi),%xmm12 652 pxor %xmm3,%xmm12 653 movdqu %xmm12,0 + 0(%rdi) 654 movdqu 16 + 0(%rsi),%xmm12 655 pxor %xmm7,%xmm12 656 movdqu %xmm12,16 + 0(%rdi) 657 movdqu 32 + 0(%rsi),%xmm12 658 pxor %xmm11,%xmm12 659 movdqu %xmm12,32 + 0(%rdi) 660 movdqu 48 + 0(%rsi),%xmm12 661 pxor %xmm15,%xmm12 662 movdqu %xmm12,48 + 0(%rdi) 663 movdqu 0 + 64(%rsi),%xmm3 664 movdqu 16 + 64(%rsi),%xmm7 665 movdqu 32 + 64(%rsi),%xmm11 666 movdqu 48 + 64(%rsi),%xmm15 667 pxor %xmm3,%xmm2 668 pxor %xmm7,%xmm6 669 pxor %xmm11,%xmm10 670 pxor %xmm14,%xmm15 671 movdqu %xmm2,0 + 64(%rdi) 672 movdqu %xmm6,16 + 64(%rdi) 673 movdqu %xmm10,32 + 64(%rdi) 674 movdqu %xmm15,48 + 64(%rdi) 675 movdqu 0 + 128(%rsi),%xmm3 676 movdqu 16 + 128(%rsi),%xmm7 677 movdqu 32 + 128(%rsi),%xmm11 678 movdqu 48 + 128(%rsi),%xmm15 679 pxor %xmm3,%xmm1 680 pxor %xmm7,%xmm5 681 pxor %xmm11,%xmm9 682 pxor %xmm13,%xmm15 683 movdqu %xmm1,0 + 128(%rdi) 684 movdqu %xmm5,16 + 128(%rdi) 685 movdqu %xmm9,32 + 128(%rdi) 686 movdqu %xmm15,48 + 128(%rdi) 687 movdqu 0 + 192(%rsi),%xmm3 688 movdqu 16 + 192(%rsi),%xmm7 689 movdqu 32 + 192(%rsi),%xmm11 690 movdqu 48 + 192(%rsi),%xmm15 691 pxor %xmm3,%xmm0 692 pxor %xmm7,%xmm4 693 pxor %xmm11,%xmm8 694 pxor 80(%rbp),%xmm15 695 movdqu %xmm0,0 + 192(%rdi) 696 movdqu %xmm4,16 + 192(%rdi) 697 movdqu %xmm8,32 + 192(%rdi) 698 movdqu %xmm15,48 + 192(%rdi) 699 700 leaq 256(%rsi),%rsi 701 leaq 256(%rdi),%rdi 702 subq $256,%rbx 703 jmp open_sse_main_loop 7042: 705 706 testq %rbx,%rbx 707 jz open_sse_finalize 708 cmpq $64,%rbx 709 ja 3f 710 movdqa .chacha20_consts(%rip),%xmm0 711 movdqa 48(%rbp),%xmm4 712 movdqa 64(%rbp),%xmm8 713 movdqa 96(%rbp),%xmm12 714 paddd .sse_inc(%rip),%xmm12 715 movdqa %xmm12,96(%rbp) 716 717 xorq %r8,%r8 718 movq %rbx,%rcx 719 cmpq $16,%rcx 720 jb 2f 7211: 722 addq 0(%rsi,%r8), %r10 723 adcq 8+0(%rsi,%r8), %r11 724 adcq $1,%r12 725 movq 0+0(%rbp),%rax 726 movq %rax,%r15 727 mulq %r10 728 movq %rax,%r13 729 movq %rdx,%r14 730 movq 0+0(%rbp),%rax 731 mulq %r11 732 imulq %r12,%r15 733 addq %rax,%r14 734 adcq %rdx,%r15 735 movq 8+0(%rbp),%rax 736 movq %rax,%r9 737 mulq %r10 738 addq %rax,%r14 739 adcq $0,%rdx 740 movq %rdx,%r10 741 movq 8+0(%rbp),%rax 742 mulq %r11 743 addq %rax,%r15 744 adcq $0,%rdx 745 imulq %r12,%r9 746 addq %r10,%r15 747 adcq %rdx,%r9 748 movq %r13,%r10 749 movq %r14,%r11 750 movq %r15,%r12 751 andq $3,%r12 752 movq %r15,%r13 753 andq $-4,%r13 754 movq %r9,%r14 755 shrdq $2,%r9,%r15 756 shrq $2,%r9 757 addq %r13,%r10 758 adcq %r14,%r11 759 adcq $0,%r12 760 addq %r15,%r10 761 adcq %r9,%r11 762 adcq $0,%r12 763 764 subq $16,%rcx 7652: 766 addq $16,%r8 767 paddd %xmm4,%xmm0 768 pxor %xmm0,%xmm12 769 pshufb .rol16(%rip),%xmm12 770 paddd %xmm12,%xmm8 771 pxor %xmm8,%xmm4 772 movdqa %xmm4,%xmm3 773 pslld $12,%xmm3 774 psrld $20,%xmm4 775 pxor %xmm3,%xmm4 776 paddd %xmm4,%xmm0 777 pxor %xmm0,%xmm12 778 pshufb .rol8(%rip),%xmm12 779 paddd %xmm12,%xmm8 780 pxor %xmm8,%xmm4 781 movdqa %xmm4,%xmm3 782 pslld $7,%xmm3 783 psrld $25,%xmm4 784 pxor %xmm3,%xmm4 785.byte 102,15,58,15,228,4 786.byte 102,69,15,58,15,192,8 787.byte 102,69,15,58,15,228,12 788 paddd %xmm4,%xmm0 789 pxor %xmm0,%xmm12 790 pshufb .rol16(%rip),%xmm12 791 paddd %xmm12,%xmm8 792 pxor %xmm8,%xmm4 793 movdqa %xmm4,%xmm3 794 pslld $12,%xmm3 795 psrld $20,%xmm4 796 pxor %xmm3,%xmm4 797 paddd %xmm4,%xmm0 798 pxor %xmm0,%xmm12 799 pshufb .rol8(%rip),%xmm12 800 paddd %xmm12,%xmm8 801 pxor %xmm8,%xmm4 802 movdqa %xmm4,%xmm3 803 pslld $7,%xmm3 804 psrld $25,%xmm4 805 pxor %xmm3,%xmm4 806.byte 102,15,58,15,228,12 807.byte 102,69,15,58,15,192,8 808.byte 102,69,15,58,15,228,4 809 810 cmpq $16,%rcx 811 jae 1b 812 cmpq $160,%r8 813 jne 2b 814 paddd .chacha20_consts(%rip),%xmm0 815 paddd 48(%rbp),%xmm4 816 paddd 64(%rbp),%xmm8 817 paddd 96(%rbp),%xmm12 818 819 jmp open_sse_tail_64_dec_loop 8203: 821 cmpq $128,%rbx 822 ja 3f 823 movdqa .chacha20_consts(%rip),%xmm0 824 movdqa 48(%rbp),%xmm4 825 movdqa 64(%rbp),%xmm8 826 movdqa %xmm0,%xmm1 827 movdqa %xmm4,%xmm5 828 movdqa %xmm8,%xmm9 829 movdqa 96(%rbp),%xmm13 830 paddd .sse_inc(%rip),%xmm13 831 movdqa %xmm13,%xmm12 832 paddd .sse_inc(%rip),%xmm12 833 movdqa %xmm12,96(%rbp) 834 movdqa %xmm13,112(%rbp) 835 836 movq %rbx,%rcx 837 andq $-16,%rcx 838 xorq %r8,%r8 8391: 840 addq 0(%rsi,%r8), %r10 841 adcq 8+0(%rsi,%r8), %r11 842 adcq $1,%r12 843 movq 0+0(%rbp),%rax 844 movq %rax,%r15 845 mulq %r10 846 movq %rax,%r13 847 movq %rdx,%r14 848 movq 0+0(%rbp),%rax 849 mulq %r11 850 imulq %r12,%r15 851 addq %rax,%r14 852 adcq %rdx,%r15 853 movq 8+0(%rbp),%rax 854 movq %rax,%r9 855 mulq %r10 856 addq %rax,%r14 857 adcq $0,%rdx 858 movq %rdx,%r10 859 movq 8+0(%rbp),%rax 860 mulq %r11 861 addq %rax,%r15 862 adcq $0,%rdx 863 imulq %r12,%r9 864 addq %r10,%r15 865 adcq %rdx,%r9 866 movq %r13,%r10 867 movq %r14,%r11 868 movq %r15,%r12 869 andq $3,%r12 870 movq %r15,%r13 871 andq $-4,%r13 872 movq %r9,%r14 873 shrdq $2,%r9,%r15 874 shrq $2,%r9 875 addq %r13,%r10 876 adcq %r14,%r11 877 adcq $0,%r12 878 addq %r15,%r10 879 adcq %r9,%r11 880 adcq $0,%r12 881 8822: 883 addq $16,%r8 884 paddd %xmm4,%xmm0 885 pxor %xmm0,%xmm12 886 pshufb .rol16(%rip),%xmm12 887 paddd %xmm12,%xmm8 888 pxor %xmm8,%xmm4 889 movdqa %xmm4,%xmm3 890 pslld $12,%xmm3 891 psrld $20,%xmm4 892 pxor %xmm3,%xmm4 893 paddd %xmm4,%xmm0 894 pxor %xmm0,%xmm12 895 pshufb .rol8(%rip),%xmm12 896 paddd %xmm12,%xmm8 897 pxor %xmm8,%xmm4 898 movdqa %xmm4,%xmm3 899 pslld $7,%xmm3 900 psrld $25,%xmm4 901 pxor %xmm3,%xmm4 902.byte 102,15,58,15,228,4 903.byte 102,69,15,58,15,192,8 904.byte 102,69,15,58,15,228,12 905 paddd %xmm5,%xmm1 906 pxor %xmm1,%xmm13 907 pshufb .rol16(%rip),%xmm13 908 paddd %xmm13,%xmm9 909 pxor %xmm9,%xmm5 910 movdqa %xmm5,%xmm3 911 pslld $12,%xmm3 912 psrld $20,%xmm5 913 pxor %xmm3,%xmm5 914 paddd %xmm5,%xmm1 915 pxor %xmm1,%xmm13 916 pshufb .rol8(%rip),%xmm13 917 paddd %xmm13,%xmm9 918 pxor %xmm9,%xmm5 919 movdqa %xmm5,%xmm3 920 pslld $7,%xmm3 921 psrld $25,%xmm5 922 pxor %xmm3,%xmm5 923.byte 102,15,58,15,237,4 924.byte 102,69,15,58,15,201,8 925.byte 102,69,15,58,15,237,12 926 paddd %xmm4,%xmm0 927 pxor %xmm0,%xmm12 928 pshufb .rol16(%rip),%xmm12 929 paddd %xmm12,%xmm8 930 pxor %xmm8,%xmm4 931 movdqa %xmm4,%xmm3 932 pslld $12,%xmm3 933 psrld $20,%xmm4 934 pxor %xmm3,%xmm4 935 paddd %xmm4,%xmm0 936 pxor %xmm0,%xmm12 937 pshufb .rol8(%rip),%xmm12 938 paddd %xmm12,%xmm8 939 pxor %xmm8,%xmm4 940 movdqa %xmm4,%xmm3 941 pslld $7,%xmm3 942 psrld $25,%xmm4 943 pxor %xmm3,%xmm4 944.byte 102,15,58,15,228,12 945.byte 102,69,15,58,15,192,8 946.byte 102,69,15,58,15,228,4 947 paddd %xmm5,%xmm1 948 pxor %xmm1,%xmm13 949 pshufb .rol16(%rip),%xmm13 950 paddd %xmm13,%xmm9 951 pxor %xmm9,%xmm5 952 movdqa %xmm5,%xmm3 953 pslld $12,%xmm3 954 psrld $20,%xmm5 955 pxor %xmm3,%xmm5 956 paddd %xmm5,%xmm1 957 pxor %xmm1,%xmm13 958 pshufb .rol8(%rip),%xmm13 959 paddd %xmm13,%xmm9 960 pxor %xmm9,%xmm5 961 movdqa %xmm5,%xmm3 962 pslld $7,%xmm3 963 psrld $25,%xmm5 964 pxor %xmm3,%xmm5 965.byte 102,15,58,15,237,12 966.byte 102,69,15,58,15,201,8 967.byte 102,69,15,58,15,237,4 968 969 cmpq %rcx,%r8 970 jb 1b 971 cmpq $160,%r8 972 jne 2b 973 paddd .chacha20_consts(%rip),%xmm1 974 paddd 48(%rbp),%xmm5 975 paddd 64(%rbp),%xmm9 976 paddd 112(%rbp),%xmm13 977 paddd .chacha20_consts(%rip),%xmm0 978 paddd 48(%rbp),%xmm4 979 paddd 64(%rbp),%xmm8 980 paddd 96(%rbp),%xmm12 981 movdqu 0 + 0(%rsi),%xmm3 982 movdqu 16 + 0(%rsi),%xmm7 983 movdqu 32 + 0(%rsi),%xmm11 984 movdqu 48 + 0(%rsi),%xmm15 985 pxor %xmm3,%xmm1 986 pxor %xmm7,%xmm5 987 pxor %xmm11,%xmm9 988 pxor %xmm13,%xmm15 989 movdqu %xmm1,0 + 0(%rdi) 990 movdqu %xmm5,16 + 0(%rdi) 991 movdqu %xmm9,32 + 0(%rdi) 992 movdqu %xmm15,48 + 0(%rdi) 993 994 subq $64,%rbx 995 leaq 64(%rsi),%rsi 996 leaq 64(%rdi),%rdi 997 jmp open_sse_tail_64_dec_loop 9983: 999 cmpq $192,%rbx 1000 ja 3f 1001 movdqa .chacha20_consts(%rip),%xmm0 1002 movdqa 48(%rbp),%xmm4 1003 movdqa 64(%rbp),%xmm8 1004 movdqa %xmm0,%xmm1 1005 movdqa %xmm4,%xmm5 1006 movdqa %xmm8,%xmm9 1007 movdqa %xmm0,%xmm2 1008 movdqa %xmm4,%xmm6 1009 movdqa %xmm8,%xmm10 1010 movdqa 96(%rbp),%xmm14 1011 paddd .sse_inc(%rip),%xmm14 1012 movdqa %xmm14,%xmm13 1013 paddd .sse_inc(%rip),%xmm13 1014 movdqa %xmm13,%xmm12 1015 paddd .sse_inc(%rip),%xmm12 1016 movdqa %xmm12,96(%rbp) 1017 movdqa %xmm13,112(%rbp) 1018 movdqa %xmm14,128(%rbp) 1019 1020 movq %rbx,%rcx 1021 movq $160,%r8 1022 cmpq $160,%rcx 1023 cmovgq %r8,%rcx 1024 andq $-16,%rcx 1025 xorq %r8,%r8 10261: 1027 addq 0(%rsi,%r8), %r10 1028 adcq 8+0(%rsi,%r8), %r11 1029 adcq $1,%r12 1030 movq 0+0(%rbp),%rax 1031 movq %rax,%r15 1032 mulq %r10 1033 movq %rax,%r13 1034 movq %rdx,%r14 1035 movq 0+0(%rbp),%rax 1036 mulq %r11 1037 imulq %r12,%r15 1038 addq %rax,%r14 1039 adcq %rdx,%r15 1040 movq 8+0(%rbp),%rax 1041 movq %rax,%r9 1042 mulq %r10 1043 addq %rax,%r14 1044 adcq $0,%rdx 1045 movq %rdx,%r10 1046 movq 8+0(%rbp),%rax 1047 mulq %r11 1048 addq %rax,%r15 1049 adcq $0,%rdx 1050 imulq %r12,%r9 1051 addq %r10,%r15 1052 adcq %rdx,%r9 1053 movq %r13,%r10 1054 movq %r14,%r11 1055 movq %r15,%r12 1056 andq $3,%r12 1057 movq %r15,%r13 1058 andq $-4,%r13 1059 movq %r9,%r14 1060 shrdq $2,%r9,%r15 1061 shrq $2,%r9 1062 addq %r13,%r10 1063 adcq %r14,%r11 1064 adcq $0,%r12 1065 addq %r15,%r10 1066 adcq %r9,%r11 1067 adcq $0,%r12 1068 10692: 1070 addq $16,%r8 1071 paddd %xmm4,%xmm0 1072 pxor %xmm0,%xmm12 1073 pshufb .rol16(%rip),%xmm12 1074 paddd %xmm12,%xmm8 1075 pxor %xmm8,%xmm4 1076 movdqa %xmm4,%xmm3 1077 pslld $12,%xmm3 1078 psrld $20,%xmm4 1079 pxor %xmm3,%xmm4 1080 paddd %xmm4,%xmm0 1081 pxor %xmm0,%xmm12 1082 pshufb .rol8(%rip),%xmm12 1083 paddd %xmm12,%xmm8 1084 pxor %xmm8,%xmm4 1085 movdqa %xmm4,%xmm3 1086 pslld $7,%xmm3 1087 psrld $25,%xmm4 1088 pxor %xmm3,%xmm4 1089.byte 102,15,58,15,228,4 1090.byte 102,69,15,58,15,192,8 1091.byte 102,69,15,58,15,228,12 1092 paddd %xmm5,%xmm1 1093 pxor %xmm1,%xmm13 1094 pshufb .rol16(%rip),%xmm13 1095 paddd %xmm13,%xmm9 1096 pxor %xmm9,%xmm5 1097 movdqa %xmm5,%xmm3 1098 pslld $12,%xmm3 1099 psrld $20,%xmm5 1100 pxor %xmm3,%xmm5 1101 paddd %xmm5,%xmm1 1102 pxor %xmm1,%xmm13 1103 pshufb .rol8(%rip),%xmm13 1104 paddd %xmm13,%xmm9 1105 pxor %xmm9,%xmm5 1106 movdqa %xmm5,%xmm3 1107 pslld $7,%xmm3 1108 psrld $25,%xmm5 1109 pxor %xmm3,%xmm5 1110.byte 102,15,58,15,237,4 1111.byte 102,69,15,58,15,201,8 1112.byte 102,69,15,58,15,237,12 1113 paddd %xmm6,%xmm2 1114 pxor %xmm2,%xmm14 1115 pshufb .rol16(%rip),%xmm14 1116 paddd %xmm14,%xmm10 1117 pxor %xmm10,%xmm6 1118 movdqa %xmm6,%xmm3 1119 pslld $12,%xmm3 1120 psrld $20,%xmm6 1121 pxor %xmm3,%xmm6 1122 paddd %xmm6,%xmm2 1123 pxor %xmm2,%xmm14 1124 pshufb .rol8(%rip),%xmm14 1125 paddd %xmm14,%xmm10 1126 pxor %xmm10,%xmm6 1127 movdqa %xmm6,%xmm3 1128 pslld $7,%xmm3 1129 psrld $25,%xmm6 1130 pxor %xmm3,%xmm6 1131.byte 102,15,58,15,246,4 1132.byte 102,69,15,58,15,210,8 1133.byte 102,69,15,58,15,246,12 1134 paddd %xmm4,%xmm0 1135 pxor %xmm0,%xmm12 1136 pshufb .rol16(%rip),%xmm12 1137 paddd %xmm12,%xmm8 1138 pxor %xmm8,%xmm4 1139 movdqa %xmm4,%xmm3 1140 pslld $12,%xmm3 1141 psrld $20,%xmm4 1142 pxor %xmm3,%xmm4 1143 paddd %xmm4,%xmm0 1144 pxor %xmm0,%xmm12 1145 pshufb .rol8(%rip),%xmm12 1146 paddd %xmm12,%xmm8 1147 pxor %xmm8,%xmm4 1148 movdqa %xmm4,%xmm3 1149 pslld $7,%xmm3 1150 psrld $25,%xmm4 1151 pxor %xmm3,%xmm4 1152.byte 102,15,58,15,228,12 1153.byte 102,69,15,58,15,192,8 1154.byte 102,69,15,58,15,228,4 1155 paddd %xmm5,%xmm1 1156 pxor %xmm1,%xmm13 1157 pshufb .rol16(%rip),%xmm13 1158 paddd %xmm13,%xmm9 1159 pxor %xmm9,%xmm5 1160 movdqa %xmm5,%xmm3 1161 pslld $12,%xmm3 1162 psrld $20,%xmm5 1163 pxor %xmm3,%xmm5 1164 paddd %xmm5,%xmm1 1165 pxor %xmm1,%xmm13 1166 pshufb .rol8(%rip),%xmm13 1167 paddd %xmm13,%xmm9 1168 pxor %xmm9,%xmm5 1169 movdqa %xmm5,%xmm3 1170 pslld $7,%xmm3 1171 psrld $25,%xmm5 1172 pxor %xmm3,%xmm5 1173.byte 102,15,58,15,237,12 1174.byte 102,69,15,58,15,201,8 1175.byte 102,69,15,58,15,237,4 1176 paddd %xmm6,%xmm2 1177 pxor %xmm2,%xmm14 1178 pshufb .rol16(%rip),%xmm14 1179 paddd %xmm14,%xmm10 1180 pxor %xmm10,%xmm6 1181 movdqa %xmm6,%xmm3 1182 pslld $12,%xmm3 1183 psrld $20,%xmm6 1184 pxor %xmm3,%xmm6 1185 paddd %xmm6,%xmm2 1186 pxor %xmm2,%xmm14 1187 pshufb .rol8(%rip),%xmm14 1188 paddd %xmm14,%xmm10 1189 pxor %xmm10,%xmm6 1190 movdqa %xmm6,%xmm3 1191 pslld $7,%xmm3 1192 psrld $25,%xmm6 1193 pxor %xmm3,%xmm6 1194.byte 102,15,58,15,246,12 1195.byte 102,69,15,58,15,210,8 1196.byte 102,69,15,58,15,246,4 1197 1198 cmpq %rcx,%r8 1199 jb 1b 1200 cmpq $160,%r8 1201 jne 2b 1202 cmpq $176,%rbx 1203 jb 1f 1204 addq 160(%rsi),%r10 1205 adcq 8+160(%rsi),%r11 1206 adcq $1,%r12 1207 movq 0+0(%rbp),%rax 1208 movq %rax,%r15 1209 mulq %r10 1210 movq %rax,%r13 1211 movq %rdx,%r14 1212 movq 0+0(%rbp),%rax 1213 mulq %r11 1214 imulq %r12,%r15 1215 addq %rax,%r14 1216 adcq %rdx,%r15 1217 movq 8+0(%rbp),%rax 1218 movq %rax,%r9 1219 mulq %r10 1220 addq %rax,%r14 1221 adcq $0,%rdx 1222 movq %rdx,%r10 1223 movq 8+0(%rbp),%rax 1224 mulq %r11 1225 addq %rax,%r15 1226 adcq $0,%rdx 1227 imulq %r12,%r9 1228 addq %r10,%r15 1229 adcq %rdx,%r9 1230 movq %r13,%r10 1231 movq %r14,%r11 1232 movq %r15,%r12 1233 andq $3,%r12 1234 movq %r15,%r13 1235 andq $-4,%r13 1236 movq %r9,%r14 1237 shrdq $2,%r9,%r15 1238 shrq $2,%r9 1239 addq %r13,%r10 1240 adcq %r14,%r11 1241 adcq $0,%r12 1242 addq %r15,%r10 1243 adcq %r9,%r11 1244 adcq $0,%r12 1245 1246 cmpq $192,%rbx 1247 jb 1f 1248 addq 176(%rsi),%r10 1249 adcq 8+176(%rsi),%r11 1250 adcq $1,%r12 1251 movq 0+0(%rbp),%rax 1252 movq %rax,%r15 1253 mulq %r10 1254 movq %rax,%r13 1255 movq %rdx,%r14 1256 movq 0+0(%rbp),%rax 1257 mulq %r11 1258 imulq %r12,%r15 1259 addq %rax,%r14 1260 adcq %rdx,%r15 1261 movq 8+0(%rbp),%rax 1262 movq %rax,%r9 1263 mulq %r10 1264 addq %rax,%r14 1265 adcq $0,%rdx 1266 movq %rdx,%r10 1267 movq 8+0(%rbp),%rax 1268 mulq %r11 1269 addq %rax,%r15 1270 adcq $0,%rdx 1271 imulq %r12,%r9 1272 addq %r10,%r15 1273 adcq %rdx,%r9 1274 movq %r13,%r10 1275 movq %r14,%r11 1276 movq %r15,%r12 1277 andq $3,%r12 1278 movq %r15,%r13 1279 andq $-4,%r13 1280 movq %r9,%r14 1281 shrdq $2,%r9,%r15 1282 shrq $2,%r9 1283 addq %r13,%r10 1284 adcq %r14,%r11 1285 adcq $0,%r12 1286 addq %r15,%r10 1287 adcq %r9,%r11 1288 adcq $0,%r12 1289 12901: 1291 paddd .chacha20_consts(%rip),%xmm2 1292 paddd 48(%rbp),%xmm6 1293 paddd 64(%rbp),%xmm10 1294 paddd 128(%rbp),%xmm14 1295 paddd .chacha20_consts(%rip),%xmm1 1296 paddd 48(%rbp),%xmm5 1297 paddd 64(%rbp),%xmm9 1298 paddd 112(%rbp),%xmm13 1299 paddd .chacha20_consts(%rip),%xmm0 1300 paddd 48(%rbp),%xmm4 1301 paddd 64(%rbp),%xmm8 1302 paddd 96(%rbp),%xmm12 1303 movdqu 0 + 0(%rsi),%xmm3 1304 movdqu 16 + 0(%rsi),%xmm7 1305 movdqu 32 + 0(%rsi),%xmm11 1306 movdqu 48 + 0(%rsi),%xmm15 1307 pxor %xmm3,%xmm2 1308 pxor %xmm7,%xmm6 1309 pxor %xmm11,%xmm10 1310 pxor %xmm14,%xmm15 1311 movdqu %xmm2,0 + 0(%rdi) 1312 movdqu %xmm6,16 + 0(%rdi) 1313 movdqu %xmm10,32 + 0(%rdi) 1314 movdqu %xmm15,48 + 0(%rdi) 1315 movdqu 0 + 64(%rsi),%xmm3 1316 movdqu 16 + 64(%rsi),%xmm7 1317 movdqu 32 + 64(%rsi),%xmm11 1318 movdqu 48 + 64(%rsi),%xmm15 1319 pxor %xmm3,%xmm1 1320 pxor %xmm7,%xmm5 1321 pxor %xmm11,%xmm9 1322 pxor %xmm13,%xmm15 1323 movdqu %xmm1,0 + 64(%rdi) 1324 movdqu %xmm5,16 + 64(%rdi) 1325 movdqu %xmm9,32 + 64(%rdi) 1326 movdqu %xmm15,48 + 64(%rdi) 1327 1328 subq $128,%rbx 1329 leaq 128(%rsi),%rsi 1330 leaq 128(%rdi),%rdi 1331 jmp open_sse_tail_64_dec_loop 13323: 1333 1334 movdqa .chacha20_consts(%rip),%xmm0 1335 movdqa 48(%rbp),%xmm4 1336 movdqa 64(%rbp),%xmm8 1337 movdqa %xmm0,%xmm1 1338 movdqa %xmm4,%xmm5 1339 movdqa %xmm8,%xmm9 1340 movdqa %xmm0,%xmm2 1341 movdqa %xmm4,%xmm6 1342 movdqa %xmm8,%xmm10 1343 movdqa %xmm0,%xmm3 1344 movdqa %xmm4,%xmm7 1345 movdqa %xmm8,%xmm11 1346 movdqa 96(%rbp),%xmm15 1347 paddd .sse_inc(%rip),%xmm15 1348 movdqa %xmm15,%xmm14 1349 paddd .sse_inc(%rip),%xmm14 1350 movdqa %xmm14,%xmm13 1351 paddd .sse_inc(%rip),%xmm13 1352 movdqa %xmm13,%xmm12 1353 paddd .sse_inc(%rip),%xmm12 1354 movdqa %xmm12,96(%rbp) 1355 movdqa %xmm13,112(%rbp) 1356 movdqa %xmm14,128(%rbp) 1357 movdqa %xmm15,144(%rbp) 1358 1359 xorq %r8,%r8 13601: 1361 addq 0(%rsi,%r8), %r10 1362 adcq 8+0(%rsi,%r8), %r11 1363 adcq $1,%r12 1364 movdqa %xmm11,80(%rbp) 1365 paddd %xmm4,%xmm0 1366 pxor %xmm0,%xmm12 1367 pshufb .rol16(%rip),%xmm12 1368 paddd %xmm12,%xmm8 1369 pxor %xmm8,%xmm4 1370 movdqa %xmm4,%xmm11 1371 pslld $12,%xmm11 1372 psrld $20,%xmm4 1373 pxor %xmm11,%xmm4 1374 paddd %xmm4,%xmm0 1375 pxor %xmm0,%xmm12 1376 pshufb .rol8(%rip),%xmm12 1377 paddd %xmm12,%xmm8 1378 pxor %xmm8,%xmm4 1379 movdqa %xmm4,%xmm11 1380 pslld $7,%xmm11 1381 psrld $25,%xmm4 1382 pxor %xmm11,%xmm4 1383.byte 102,15,58,15,228,4 1384.byte 102,69,15,58,15,192,8 1385.byte 102,69,15,58,15,228,12 1386 paddd %xmm5,%xmm1 1387 pxor %xmm1,%xmm13 1388 pshufb .rol16(%rip),%xmm13 1389 paddd %xmm13,%xmm9 1390 pxor %xmm9,%xmm5 1391 movdqa %xmm5,%xmm11 1392 pslld $12,%xmm11 1393 psrld $20,%xmm5 1394 pxor %xmm11,%xmm5 1395 paddd %xmm5,%xmm1 1396 pxor %xmm1,%xmm13 1397 pshufb .rol8(%rip),%xmm13 1398 paddd %xmm13,%xmm9 1399 pxor %xmm9,%xmm5 1400 movdqa %xmm5,%xmm11 1401 pslld $7,%xmm11 1402 psrld $25,%xmm5 1403 pxor %xmm11,%xmm5 1404.byte 102,15,58,15,237,4 1405.byte 102,69,15,58,15,201,8 1406.byte 102,69,15,58,15,237,12 1407 paddd %xmm6,%xmm2 1408 pxor %xmm2,%xmm14 1409 pshufb .rol16(%rip),%xmm14 1410 paddd %xmm14,%xmm10 1411 pxor %xmm10,%xmm6 1412 movdqa %xmm6,%xmm11 1413 pslld $12,%xmm11 1414 psrld $20,%xmm6 1415 pxor %xmm11,%xmm6 1416 paddd %xmm6,%xmm2 1417 pxor %xmm2,%xmm14 1418 pshufb .rol8(%rip),%xmm14 1419 paddd %xmm14,%xmm10 1420 pxor %xmm10,%xmm6 1421 movdqa %xmm6,%xmm11 1422 pslld $7,%xmm11 1423 psrld $25,%xmm6 1424 pxor %xmm11,%xmm6 1425.byte 102,15,58,15,246,4 1426.byte 102,69,15,58,15,210,8 1427.byte 102,69,15,58,15,246,12 1428 movdqa 80(%rbp),%xmm11 1429 movq 0+0(%rbp),%rax 1430 movq %rax,%r15 1431 mulq %r10 1432 movq %rax,%r13 1433 movq %rdx,%r14 1434 movq 0+0(%rbp),%rax 1435 mulq %r11 1436 imulq %r12,%r15 1437 addq %rax,%r14 1438 adcq %rdx,%r15 1439 movdqa %xmm9,80(%rbp) 1440 paddd %xmm7,%xmm3 1441 pxor %xmm3,%xmm15 1442 pshufb .rol16(%rip),%xmm15 1443 paddd %xmm15,%xmm11 1444 pxor %xmm11,%xmm7 1445 movdqa %xmm7,%xmm9 1446 pslld $12,%xmm9 1447 psrld $20,%xmm7 1448 pxor %xmm9,%xmm7 1449 paddd %xmm7,%xmm3 1450 pxor %xmm3,%xmm15 1451 pshufb .rol8(%rip),%xmm15 1452 paddd %xmm15,%xmm11 1453 pxor %xmm11,%xmm7 1454 movdqa %xmm7,%xmm9 1455 pslld $7,%xmm9 1456 psrld $25,%xmm7 1457 pxor %xmm9,%xmm7 1458.byte 102,15,58,15,255,4 1459.byte 102,69,15,58,15,219,8 1460.byte 102,69,15,58,15,255,12 1461 movdqa 80(%rbp),%xmm9 1462 movq 8+0(%rbp),%rax 1463 movq %rax,%r9 1464 mulq %r10 1465 addq %rax,%r14 1466 adcq $0,%rdx 1467 movq %rdx,%r10 1468 movq 8+0(%rbp),%rax 1469 mulq %r11 1470 addq %rax,%r15 1471 adcq $0,%rdx 1472 movdqa %xmm11,80(%rbp) 1473 paddd %xmm4,%xmm0 1474 pxor %xmm0,%xmm12 1475 pshufb .rol16(%rip),%xmm12 1476 paddd %xmm12,%xmm8 1477 pxor %xmm8,%xmm4 1478 movdqa %xmm4,%xmm11 1479 pslld $12,%xmm11 1480 psrld $20,%xmm4 1481 pxor %xmm11,%xmm4 1482 paddd %xmm4,%xmm0 1483 pxor %xmm0,%xmm12 1484 pshufb .rol8(%rip),%xmm12 1485 paddd %xmm12,%xmm8 1486 pxor %xmm8,%xmm4 1487 movdqa %xmm4,%xmm11 1488 pslld $7,%xmm11 1489 psrld $25,%xmm4 1490 pxor %xmm11,%xmm4 1491.byte 102,15,58,15,228,12 1492.byte 102,69,15,58,15,192,8 1493.byte 102,69,15,58,15,228,4 1494 paddd %xmm5,%xmm1 1495 pxor %xmm1,%xmm13 1496 pshufb .rol16(%rip),%xmm13 1497 paddd %xmm13,%xmm9 1498 pxor %xmm9,%xmm5 1499 movdqa %xmm5,%xmm11 1500 pslld $12,%xmm11 1501 psrld $20,%xmm5 1502 pxor %xmm11,%xmm5 1503 paddd %xmm5,%xmm1 1504 pxor %xmm1,%xmm13 1505 pshufb .rol8(%rip),%xmm13 1506 paddd %xmm13,%xmm9 1507 pxor %xmm9,%xmm5 1508 movdqa %xmm5,%xmm11 1509 pslld $7,%xmm11 1510 psrld $25,%xmm5 1511 pxor %xmm11,%xmm5 1512.byte 102,15,58,15,237,12 1513.byte 102,69,15,58,15,201,8 1514.byte 102,69,15,58,15,237,4 1515 imulq %r12,%r9 1516 addq %r10,%r15 1517 adcq %rdx,%r9 1518 paddd %xmm6,%xmm2 1519 pxor %xmm2,%xmm14 1520 pshufb .rol16(%rip),%xmm14 1521 paddd %xmm14,%xmm10 1522 pxor %xmm10,%xmm6 1523 movdqa %xmm6,%xmm11 1524 pslld $12,%xmm11 1525 psrld $20,%xmm6 1526 pxor %xmm11,%xmm6 1527 paddd %xmm6,%xmm2 1528 pxor %xmm2,%xmm14 1529 pshufb .rol8(%rip),%xmm14 1530 paddd %xmm14,%xmm10 1531 pxor %xmm10,%xmm6 1532 movdqa %xmm6,%xmm11 1533 pslld $7,%xmm11 1534 psrld $25,%xmm6 1535 pxor %xmm11,%xmm6 1536.byte 102,15,58,15,246,12 1537.byte 102,69,15,58,15,210,8 1538.byte 102,69,15,58,15,246,4 1539 movdqa 80(%rbp),%xmm11 1540 movq %r13,%r10 1541 movq %r14,%r11 1542 movq %r15,%r12 1543 andq $3,%r12 1544 movq %r15,%r13 1545 andq $-4,%r13 1546 movq %r9,%r14 1547 shrdq $2,%r9,%r15 1548 shrq $2,%r9 1549 addq %r13,%r10 1550 adcq %r14,%r11 1551 adcq $0,%r12 1552 addq %r15,%r10 1553 adcq %r9,%r11 1554 adcq $0,%r12 1555 movdqa %xmm9,80(%rbp) 1556 paddd %xmm7,%xmm3 1557 pxor %xmm3,%xmm15 1558 pshufb .rol16(%rip),%xmm15 1559 paddd %xmm15,%xmm11 1560 pxor %xmm11,%xmm7 1561 movdqa %xmm7,%xmm9 1562 pslld $12,%xmm9 1563 psrld $20,%xmm7 1564 pxor %xmm9,%xmm7 1565 paddd %xmm7,%xmm3 1566 pxor %xmm3,%xmm15 1567 pshufb .rol8(%rip),%xmm15 1568 paddd %xmm15,%xmm11 1569 pxor %xmm11,%xmm7 1570 movdqa %xmm7,%xmm9 1571 pslld $7,%xmm9 1572 psrld $25,%xmm7 1573 pxor %xmm9,%xmm7 1574.byte 102,15,58,15,255,12 1575.byte 102,69,15,58,15,219,8 1576.byte 102,69,15,58,15,255,4 1577 movdqa 80(%rbp),%xmm9 1578 1579 addq $16,%r8 1580 cmpq $160,%r8 1581 jb 1b 1582 movq %rbx,%rcx 1583 andq $-16,%rcx 15841: 1585 addq 0(%rsi,%r8), %r10 1586 adcq 8+0(%rsi,%r8), %r11 1587 adcq $1,%r12 1588 movq 0+0(%rbp),%rax 1589 movq %rax,%r15 1590 mulq %r10 1591 movq %rax,%r13 1592 movq %rdx,%r14 1593 movq 0+0(%rbp),%rax 1594 mulq %r11 1595 imulq %r12,%r15 1596 addq %rax,%r14 1597 adcq %rdx,%r15 1598 movq 8+0(%rbp),%rax 1599 movq %rax,%r9 1600 mulq %r10 1601 addq %rax,%r14 1602 adcq $0,%rdx 1603 movq %rdx,%r10 1604 movq 8+0(%rbp),%rax 1605 mulq %r11 1606 addq %rax,%r15 1607 adcq $0,%rdx 1608 imulq %r12,%r9 1609 addq %r10,%r15 1610 adcq %rdx,%r9 1611 movq %r13,%r10 1612 movq %r14,%r11 1613 movq %r15,%r12 1614 andq $3,%r12 1615 movq %r15,%r13 1616 andq $-4,%r13 1617 movq %r9,%r14 1618 shrdq $2,%r9,%r15 1619 shrq $2,%r9 1620 addq %r13,%r10 1621 adcq %r14,%r11 1622 adcq $0,%r12 1623 addq %r15,%r10 1624 adcq %r9,%r11 1625 adcq $0,%r12 1626 1627 addq $16,%r8 1628 cmpq %rcx,%r8 1629 jb 1b 1630 paddd .chacha20_consts(%rip),%xmm3 1631 paddd 48(%rbp),%xmm7 1632 paddd 64(%rbp),%xmm11 1633 paddd 144(%rbp),%xmm15 1634 paddd .chacha20_consts(%rip),%xmm2 1635 paddd 48(%rbp),%xmm6 1636 paddd 64(%rbp),%xmm10 1637 paddd 128(%rbp),%xmm14 1638 paddd .chacha20_consts(%rip),%xmm1 1639 paddd 48(%rbp),%xmm5 1640 paddd 64(%rbp),%xmm9 1641 paddd 112(%rbp),%xmm13 1642 paddd .chacha20_consts(%rip),%xmm0 1643 paddd 48(%rbp),%xmm4 1644 paddd 64(%rbp),%xmm8 1645 paddd 96(%rbp),%xmm12 1646 movdqa %xmm12,80(%rbp) 1647 movdqu 0 + 0(%rsi),%xmm12 1648 pxor %xmm3,%xmm12 1649 movdqu %xmm12,0 + 0(%rdi) 1650 movdqu 16 + 0(%rsi),%xmm12 1651 pxor %xmm7,%xmm12 1652 movdqu %xmm12,16 + 0(%rdi) 1653 movdqu 32 + 0(%rsi),%xmm12 1654 pxor %xmm11,%xmm12 1655 movdqu %xmm12,32 + 0(%rdi) 1656 movdqu 48 + 0(%rsi),%xmm12 1657 pxor %xmm15,%xmm12 1658 movdqu %xmm12,48 + 0(%rdi) 1659 movdqu 0 + 64(%rsi),%xmm3 1660 movdqu 16 + 64(%rsi),%xmm7 1661 movdqu 32 + 64(%rsi),%xmm11 1662 movdqu 48 + 64(%rsi),%xmm15 1663 pxor %xmm3,%xmm2 1664 pxor %xmm7,%xmm6 1665 pxor %xmm11,%xmm10 1666 pxor %xmm14,%xmm15 1667 movdqu %xmm2,0 + 64(%rdi) 1668 movdqu %xmm6,16 + 64(%rdi) 1669 movdqu %xmm10,32 + 64(%rdi) 1670 movdqu %xmm15,48 + 64(%rdi) 1671 movdqu 0 + 128(%rsi),%xmm3 1672 movdqu 16 + 128(%rsi),%xmm7 1673 movdqu 32 + 128(%rsi),%xmm11 1674 movdqu 48 + 128(%rsi),%xmm15 1675 pxor %xmm3,%xmm1 1676 pxor %xmm7,%xmm5 1677 pxor %xmm11,%xmm9 1678 pxor %xmm13,%xmm15 1679 movdqu %xmm1,0 + 128(%rdi) 1680 movdqu %xmm5,16 + 128(%rdi) 1681 movdqu %xmm9,32 + 128(%rdi) 1682 movdqu %xmm15,48 + 128(%rdi) 1683 1684 movdqa 80(%rbp),%xmm12 1685 subq $192,%rbx 1686 leaq 192(%rsi),%rsi 1687 leaq 192(%rdi),%rdi 1688 1689 1690open_sse_tail_64_dec_loop: 1691 cmpq $16,%rbx 1692 jb 1f 1693 subq $16,%rbx 1694 movdqu (%rsi),%xmm3 1695 pxor %xmm3,%xmm0 1696 movdqu %xmm0,(%rdi) 1697 leaq 16(%rsi),%rsi 1698 leaq 16(%rdi),%rdi 1699 movdqa %xmm4,%xmm0 1700 movdqa %xmm8,%xmm4 1701 movdqa %xmm12,%xmm8 1702 jmp open_sse_tail_64_dec_loop 17031: 1704 movdqa %xmm0,%xmm1 1705 1706 1707open_sse_tail_16: 1708 testq %rbx,%rbx 1709 jz open_sse_finalize 1710 1711 1712 1713 pxor %xmm3,%xmm3 1714 leaq -1(%rsi,%rbx), %rsi 1715 movq %rbx,%r8 17162: 1717 pslldq $1,%xmm3 1718 pinsrb $0,(%rsi),%xmm3 1719 subq $1,%rsi 1720 subq $1,%r8 1721 jnz 2b 1722 17233: 1724.byte 102,73,15,126,221 1725 pextrq $1,%xmm3,%r14 1726 1727 pxor %xmm1,%xmm3 1728 1729 17302: 1731 pextrb $0,%xmm3,(%rdi) 1732 psrldq $1,%xmm3 1733 addq $1,%rdi 1734 subq $1,%rbx 1735 jne 2b 1736 1737 addq %r13,%r10 1738 adcq %r14,%r11 1739 adcq $1,%r12 1740 movq 0+0(%rbp),%rax 1741 movq %rax,%r15 1742 mulq %r10 1743 movq %rax,%r13 1744 movq %rdx,%r14 1745 movq 0+0(%rbp),%rax 1746 mulq %r11 1747 imulq %r12,%r15 1748 addq %rax,%r14 1749 adcq %rdx,%r15 1750 movq 8+0(%rbp),%rax 1751 movq %rax,%r9 1752 mulq %r10 1753 addq %rax,%r14 1754 adcq $0,%rdx 1755 movq %rdx,%r10 1756 movq 8+0(%rbp),%rax 1757 mulq %r11 1758 addq %rax,%r15 1759 adcq $0,%rdx 1760 imulq %r12,%r9 1761 addq %r10,%r15 1762 adcq %rdx,%r9 1763 movq %r13,%r10 1764 movq %r14,%r11 1765 movq %r15,%r12 1766 andq $3,%r12 1767 movq %r15,%r13 1768 andq $-4,%r13 1769 movq %r9,%r14 1770 shrdq $2,%r9,%r15 1771 shrq $2,%r9 1772 addq %r13,%r10 1773 adcq %r14,%r11 1774 adcq $0,%r12 1775 addq %r15,%r10 1776 adcq %r9,%r11 1777 adcq $0,%r12 1778 1779 1780open_sse_finalize: 1781 addq 32(%rbp),%r10 1782 adcq 8+32(%rbp),%r11 1783 adcq $1,%r12 1784 movq 0+0(%rbp),%rax 1785 movq %rax,%r15 1786 mulq %r10 1787 movq %rax,%r13 1788 movq %rdx,%r14 1789 movq 0+0(%rbp),%rax 1790 mulq %r11 1791 imulq %r12,%r15 1792 addq %rax,%r14 1793 adcq %rdx,%r15 1794 movq 8+0(%rbp),%rax 1795 movq %rax,%r9 1796 mulq %r10 1797 addq %rax,%r14 1798 adcq $0,%rdx 1799 movq %rdx,%r10 1800 movq 8+0(%rbp),%rax 1801 mulq %r11 1802 addq %rax,%r15 1803 adcq $0,%rdx 1804 imulq %r12,%r9 1805 addq %r10,%r15 1806 adcq %rdx,%r9 1807 movq %r13,%r10 1808 movq %r14,%r11 1809 movq %r15,%r12 1810 andq $3,%r12 1811 movq %r15,%r13 1812 andq $-4,%r13 1813 movq %r9,%r14 1814 shrdq $2,%r9,%r15 1815 shrq $2,%r9 1816 addq %r13,%r10 1817 adcq %r14,%r11 1818 adcq $0,%r12 1819 addq %r15,%r10 1820 adcq %r9,%r11 1821 adcq $0,%r12 1822 1823 1824 movq %r10,%r13 1825 movq %r11,%r14 1826 movq %r12,%r15 1827 subq $-5,%r10 1828 sbbq $-1,%r11 1829 sbbq $3,%r12 1830 cmovcq %r13,%r10 1831 cmovcq %r14,%r11 1832 cmovcq %r15,%r12 1833 1834 addq 0+16(%rbp),%r10 1835 adcq 8+16(%rbp),%r11 1836 1837 addq $288 + 32,%rsp 1838.cfi_adjust_cfa_offset -(288 + 32) 1839 popq %r9 1840.cfi_adjust_cfa_offset -8 1841 movq %r10,(%r9) 1842 movq %r11,8(%r9) 1843 1844 popq %r15 1845.cfi_adjust_cfa_offset -8 1846 popq %r14 1847.cfi_adjust_cfa_offset -8 1848 popq %r13 1849.cfi_adjust_cfa_offset -8 1850 popq %r12 1851.cfi_adjust_cfa_offset -8 1852 popq %rbx 1853.cfi_adjust_cfa_offset -8 1854 popq %rbp 1855.cfi_adjust_cfa_offset -8 1856 .byte 0xf3,0xc3 1857.cfi_adjust_cfa_offset (8 * 6) + 288 + 32 1858 1859open_sse_128: 1860 movdqu .chacha20_consts(%rip),%xmm0 1861 movdqa %xmm0,%xmm1 1862 movdqa %xmm0,%xmm2 1863 movdqu 0(%r9),%xmm4 1864 movdqa %xmm4,%xmm5 1865 movdqa %xmm4,%xmm6 1866 movdqu 16(%r9),%xmm8 1867 movdqa %xmm8,%xmm9 1868 movdqa %xmm8,%xmm10 1869 movdqu 32(%r9),%xmm12 1870 movdqa %xmm12,%xmm13 1871 paddd .sse_inc(%rip),%xmm13 1872 movdqa %xmm13,%xmm14 1873 paddd .sse_inc(%rip),%xmm14 1874 movdqa %xmm4,%xmm7 1875 movdqa %xmm8,%xmm11 1876 movdqa %xmm13,%xmm15 1877 movq $10,%r10 18781: 1879 paddd %xmm4,%xmm0 1880 pxor %xmm0,%xmm12 1881 pshufb .rol16(%rip),%xmm12 1882 paddd %xmm12,%xmm8 1883 pxor %xmm8,%xmm4 1884 movdqa %xmm4,%xmm3 1885 pslld $12,%xmm3 1886 psrld $20,%xmm4 1887 pxor %xmm3,%xmm4 1888 paddd %xmm4,%xmm0 1889 pxor %xmm0,%xmm12 1890 pshufb .rol8(%rip),%xmm12 1891 paddd %xmm12,%xmm8 1892 pxor %xmm8,%xmm4 1893 movdqa %xmm4,%xmm3 1894 pslld $7,%xmm3 1895 psrld $25,%xmm4 1896 pxor %xmm3,%xmm4 1897.byte 102,15,58,15,228,4 1898.byte 102,69,15,58,15,192,8 1899.byte 102,69,15,58,15,228,12 1900 paddd %xmm5,%xmm1 1901 pxor %xmm1,%xmm13 1902 pshufb .rol16(%rip),%xmm13 1903 paddd %xmm13,%xmm9 1904 pxor %xmm9,%xmm5 1905 movdqa %xmm5,%xmm3 1906 pslld $12,%xmm3 1907 psrld $20,%xmm5 1908 pxor %xmm3,%xmm5 1909 paddd %xmm5,%xmm1 1910 pxor %xmm1,%xmm13 1911 pshufb .rol8(%rip),%xmm13 1912 paddd %xmm13,%xmm9 1913 pxor %xmm9,%xmm5 1914 movdqa %xmm5,%xmm3 1915 pslld $7,%xmm3 1916 psrld $25,%xmm5 1917 pxor %xmm3,%xmm5 1918.byte 102,15,58,15,237,4 1919.byte 102,69,15,58,15,201,8 1920.byte 102,69,15,58,15,237,12 1921 paddd %xmm6,%xmm2 1922 pxor %xmm2,%xmm14 1923 pshufb .rol16(%rip),%xmm14 1924 paddd %xmm14,%xmm10 1925 pxor %xmm10,%xmm6 1926 movdqa %xmm6,%xmm3 1927 pslld $12,%xmm3 1928 psrld $20,%xmm6 1929 pxor %xmm3,%xmm6 1930 paddd %xmm6,%xmm2 1931 pxor %xmm2,%xmm14 1932 pshufb .rol8(%rip),%xmm14 1933 paddd %xmm14,%xmm10 1934 pxor %xmm10,%xmm6 1935 movdqa %xmm6,%xmm3 1936 pslld $7,%xmm3 1937 psrld $25,%xmm6 1938 pxor %xmm3,%xmm6 1939.byte 102,15,58,15,246,4 1940.byte 102,69,15,58,15,210,8 1941.byte 102,69,15,58,15,246,12 1942 paddd %xmm4,%xmm0 1943 pxor %xmm0,%xmm12 1944 pshufb .rol16(%rip),%xmm12 1945 paddd %xmm12,%xmm8 1946 pxor %xmm8,%xmm4 1947 movdqa %xmm4,%xmm3 1948 pslld $12,%xmm3 1949 psrld $20,%xmm4 1950 pxor %xmm3,%xmm4 1951 paddd %xmm4,%xmm0 1952 pxor %xmm0,%xmm12 1953 pshufb .rol8(%rip),%xmm12 1954 paddd %xmm12,%xmm8 1955 pxor %xmm8,%xmm4 1956 movdqa %xmm4,%xmm3 1957 pslld $7,%xmm3 1958 psrld $25,%xmm4 1959 pxor %xmm3,%xmm4 1960.byte 102,15,58,15,228,12 1961.byte 102,69,15,58,15,192,8 1962.byte 102,69,15,58,15,228,4 1963 paddd %xmm5,%xmm1 1964 pxor %xmm1,%xmm13 1965 pshufb .rol16(%rip),%xmm13 1966 paddd %xmm13,%xmm9 1967 pxor %xmm9,%xmm5 1968 movdqa %xmm5,%xmm3 1969 pslld $12,%xmm3 1970 psrld $20,%xmm5 1971 pxor %xmm3,%xmm5 1972 paddd %xmm5,%xmm1 1973 pxor %xmm1,%xmm13 1974 pshufb .rol8(%rip),%xmm13 1975 paddd %xmm13,%xmm9 1976 pxor %xmm9,%xmm5 1977 movdqa %xmm5,%xmm3 1978 pslld $7,%xmm3 1979 psrld $25,%xmm5 1980 pxor %xmm3,%xmm5 1981.byte 102,15,58,15,237,12 1982.byte 102,69,15,58,15,201,8 1983.byte 102,69,15,58,15,237,4 1984 paddd %xmm6,%xmm2 1985 pxor %xmm2,%xmm14 1986 pshufb .rol16(%rip),%xmm14 1987 paddd %xmm14,%xmm10 1988 pxor %xmm10,%xmm6 1989 movdqa %xmm6,%xmm3 1990 pslld $12,%xmm3 1991 psrld $20,%xmm6 1992 pxor %xmm3,%xmm6 1993 paddd %xmm6,%xmm2 1994 pxor %xmm2,%xmm14 1995 pshufb .rol8(%rip),%xmm14 1996 paddd %xmm14,%xmm10 1997 pxor %xmm10,%xmm6 1998 movdqa %xmm6,%xmm3 1999 pslld $7,%xmm3 2000 psrld $25,%xmm6 2001 pxor %xmm3,%xmm6 2002.byte 102,15,58,15,246,12 2003.byte 102,69,15,58,15,210,8 2004.byte 102,69,15,58,15,246,4 2005 2006 decq %r10 2007 jnz 1b 2008 paddd .chacha20_consts(%rip),%xmm0 2009 paddd .chacha20_consts(%rip),%xmm1 2010 paddd .chacha20_consts(%rip),%xmm2 2011 paddd %xmm7,%xmm4 2012 paddd %xmm7,%xmm5 2013 paddd %xmm7,%xmm6 2014 paddd %xmm11,%xmm9 2015 paddd %xmm11,%xmm10 2016 paddd %xmm15,%xmm13 2017 paddd .sse_inc(%rip),%xmm15 2018 paddd %xmm15,%xmm14 2019 2020 pand .clamp(%rip),%xmm0 2021 movdqa %xmm0,0(%rbp) 2022 movdqa %xmm4,16(%rbp) 2023 2024 movq %r8,%r8 2025 call poly_hash_ad_internal 20261: 2027 cmpq $16,%rbx 2028 jb open_sse_tail_16 2029 subq $16,%rbx 2030 addq 0(%rsi),%r10 2031 adcq 8+0(%rsi),%r11 2032 adcq $1,%r12 2033 2034 2035 movdqu 0(%rsi),%xmm3 2036 pxor %xmm3,%xmm1 2037 movdqu %xmm1,0(%rdi) 2038 leaq 16(%rsi),%rsi 2039 leaq 16(%rdi),%rdi 2040 movq 0+0(%rbp),%rax 2041 movq %rax,%r15 2042 mulq %r10 2043 movq %rax,%r13 2044 movq %rdx,%r14 2045 movq 0+0(%rbp),%rax 2046 mulq %r11 2047 imulq %r12,%r15 2048 addq %rax,%r14 2049 adcq %rdx,%r15 2050 movq 8+0(%rbp),%rax 2051 movq %rax,%r9 2052 mulq %r10 2053 addq %rax,%r14 2054 adcq $0,%rdx 2055 movq %rdx,%r10 2056 movq 8+0(%rbp),%rax 2057 mulq %r11 2058 addq %rax,%r15 2059 adcq $0,%rdx 2060 imulq %r12,%r9 2061 addq %r10,%r15 2062 adcq %rdx,%r9 2063 movq %r13,%r10 2064 movq %r14,%r11 2065 movq %r15,%r12 2066 andq $3,%r12 2067 movq %r15,%r13 2068 andq $-4,%r13 2069 movq %r9,%r14 2070 shrdq $2,%r9,%r15 2071 shrq $2,%r9 2072 addq %r13,%r10 2073 adcq %r14,%r11 2074 adcq $0,%r12 2075 addq %r15,%r10 2076 adcq %r9,%r11 2077 adcq $0,%r12 2078 2079 2080 movdqa %xmm5,%xmm1 2081 movdqa %xmm9,%xmm5 2082 movdqa %xmm13,%xmm9 2083 movdqa %xmm2,%xmm13 2084 movdqa %xmm6,%xmm2 2085 movdqa %xmm10,%xmm6 2086 movdqa %xmm14,%xmm10 2087 jmp 1b 2088 jmp open_sse_tail_16 2089.size chacha20_poly1305_open, .-chacha20_poly1305_open 2090.cfi_endproc 2091 2092 2093 2094 2095.globl chacha20_poly1305_seal 2096.hidden chacha20_poly1305_seal 2097.type chacha20_poly1305_seal,@function 2098.align 64 2099chacha20_poly1305_seal: 2100.cfi_startproc 2101 pushq %rbp 2102.cfi_adjust_cfa_offset 8 2103 pushq %rbx 2104.cfi_adjust_cfa_offset 8 2105 pushq %r12 2106.cfi_adjust_cfa_offset 8 2107 pushq %r13 2108.cfi_adjust_cfa_offset 8 2109 pushq %r14 2110.cfi_adjust_cfa_offset 8 2111 pushq %r15 2112.cfi_adjust_cfa_offset 8 2113 2114 2115 pushq %r9 2116.cfi_adjust_cfa_offset 8 2117 subq $288 + 32,%rsp 2118.cfi_adjust_cfa_offset 288 + 32 2119.cfi_offset rbp, -16 2120.cfi_offset rbx, -24 2121.cfi_offset r12, -32 2122.cfi_offset r13, -40 2123.cfi_offset r14, -48 2124.cfi_offset r15, -56 2125 leaq 32(%rsp),%rbp 2126 andq $-32,%rbp 2127 movq %rdx,8+32(%rbp) 2128 movq %r8,0+32(%rbp) 2129 movq %rdx,%rbx 2130 2131 movl OPENSSL_ia32cap_P+8(%rip),%eax 2132 andl $288,%eax 2133 xorl $288,%eax 2134 jz chacha20_poly1305_seal_avx2 2135 2136 cmpq $128,%rbx 2137 jbe seal_sse_128 2138 2139 movdqa .chacha20_consts(%rip),%xmm0 2140 movdqu 0(%r9),%xmm4 2141 movdqu 16(%r9),%xmm8 2142 movdqu 32(%r9),%xmm12 2143 movdqa %xmm0,%xmm1 2144 movdqa %xmm0,%xmm2 2145 movdqa %xmm0,%xmm3 2146 movdqa %xmm4,%xmm5 2147 movdqa %xmm4,%xmm6 2148 movdqa %xmm4,%xmm7 2149 movdqa %xmm8,%xmm9 2150 movdqa %xmm8,%xmm10 2151 movdqa %xmm8,%xmm11 2152 movdqa %xmm12,%xmm15 2153 paddd .sse_inc(%rip),%xmm12 2154 movdqa %xmm12,%xmm14 2155 paddd .sse_inc(%rip),%xmm12 2156 movdqa %xmm12,%xmm13 2157 paddd .sse_inc(%rip),%xmm12 2158 2159 movdqa %xmm4,48(%rbp) 2160 movdqa %xmm8,64(%rbp) 2161 movdqa %xmm12,96(%rbp) 2162 movdqa %xmm13,112(%rbp) 2163 movdqa %xmm14,128(%rbp) 2164 movdqa %xmm15,144(%rbp) 2165 movq $10,%r10 21661: 2167 movdqa %xmm8,80(%rbp) 2168 movdqa .rol16(%rip),%xmm8 2169 paddd %xmm7,%xmm3 2170 paddd %xmm6,%xmm2 2171 paddd %xmm5,%xmm1 2172 paddd %xmm4,%xmm0 2173 pxor %xmm3,%xmm15 2174 pxor %xmm2,%xmm14 2175 pxor %xmm1,%xmm13 2176 pxor %xmm0,%xmm12 2177.byte 102,69,15,56,0,248 2178.byte 102,69,15,56,0,240 2179.byte 102,69,15,56,0,232 2180.byte 102,69,15,56,0,224 2181 movdqa 80(%rbp),%xmm8 2182 paddd %xmm15,%xmm11 2183 paddd %xmm14,%xmm10 2184 paddd %xmm13,%xmm9 2185 paddd %xmm12,%xmm8 2186 pxor %xmm11,%xmm7 2187 pxor %xmm10,%xmm6 2188 pxor %xmm9,%xmm5 2189 pxor %xmm8,%xmm4 2190 movdqa %xmm8,80(%rbp) 2191 movdqa %xmm7,%xmm8 2192 psrld $20,%xmm8 2193 pslld $32-20,%xmm7 2194 pxor %xmm8,%xmm7 2195 movdqa %xmm6,%xmm8 2196 psrld $20,%xmm8 2197 pslld $32-20,%xmm6 2198 pxor %xmm8,%xmm6 2199 movdqa %xmm5,%xmm8 2200 psrld $20,%xmm8 2201 pslld $32-20,%xmm5 2202 pxor %xmm8,%xmm5 2203 movdqa %xmm4,%xmm8 2204 psrld $20,%xmm8 2205 pslld $32-20,%xmm4 2206 pxor %xmm8,%xmm4 2207 movdqa .rol8(%rip),%xmm8 2208 paddd %xmm7,%xmm3 2209 paddd %xmm6,%xmm2 2210 paddd %xmm5,%xmm1 2211 paddd %xmm4,%xmm0 2212 pxor %xmm3,%xmm15 2213 pxor %xmm2,%xmm14 2214 pxor %xmm1,%xmm13 2215 pxor %xmm0,%xmm12 2216.byte 102,69,15,56,0,248 2217.byte 102,69,15,56,0,240 2218.byte 102,69,15,56,0,232 2219.byte 102,69,15,56,0,224 2220 movdqa 80(%rbp),%xmm8 2221 paddd %xmm15,%xmm11 2222 paddd %xmm14,%xmm10 2223 paddd %xmm13,%xmm9 2224 paddd %xmm12,%xmm8 2225 pxor %xmm11,%xmm7 2226 pxor %xmm10,%xmm6 2227 pxor %xmm9,%xmm5 2228 pxor %xmm8,%xmm4 2229 movdqa %xmm8,80(%rbp) 2230 movdqa %xmm7,%xmm8 2231 psrld $25,%xmm8 2232 pslld $32-25,%xmm7 2233 pxor %xmm8,%xmm7 2234 movdqa %xmm6,%xmm8 2235 psrld $25,%xmm8 2236 pslld $32-25,%xmm6 2237 pxor %xmm8,%xmm6 2238 movdqa %xmm5,%xmm8 2239 psrld $25,%xmm8 2240 pslld $32-25,%xmm5 2241 pxor %xmm8,%xmm5 2242 movdqa %xmm4,%xmm8 2243 psrld $25,%xmm8 2244 pslld $32-25,%xmm4 2245 pxor %xmm8,%xmm4 2246 movdqa 80(%rbp),%xmm8 2247.byte 102,15,58,15,255,4 2248.byte 102,69,15,58,15,219,8 2249.byte 102,69,15,58,15,255,12 2250.byte 102,15,58,15,246,4 2251.byte 102,69,15,58,15,210,8 2252.byte 102,69,15,58,15,246,12 2253.byte 102,15,58,15,237,4 2254.byte 102,69,15,58,15,201,8 2255.byte 102,69,15,58,15,237,12 2256.byte 102,15,58,15,228,4 2257.byte 102,69,15,58,15,192,8 2258.byte 102,69,15,58,15,228,12 2259 movdqa %xmm8,80(%rbp) 2260 movdqa .rol16(%rip),%xmm8 2261 paddd %xmm7,%xmm3 2262 paddd %xmm6,%xmm2 2263 paddd %xmm5,%xmm1 2264 paddd %xmm4,%xmm0 2265 pxor %xmm3,%xmm15 2266 pxor %xmm2,%xmm14 2267 pxor %xmm1,%xmm13 2268 pxor %xmm0,%xmm12 2269.byte 102,69,15,56,0,248 2270.byte 102,69,15,56,0,240 2271.byte 102,69,15,56,0,232 2272.byte 102,69,15,56,0,224 2273 movdqa 80(%rbp),%xmm8 2274 paddd %xmm15,%xmm11 2275 paddd %xmm14,%xmm10 2276 paddd %xmm13,%xmm9 2277 paddd %xmm12,%xmm8 2278 pxor %xmm11,%xmm7 2279 pxor %xmm10,%xmm6 2280 pxor %xmm9,%xmm5 2281 pxor %xmm8,%xmm4 2282 movdqa %xmm8,80(%rbp) 2283 movdqa %xmm7,%xmm8 2284 psrld $20,%xmm8 2285 pslld $32-20,%xmm7 2286 pxor %xmm8,%xmm7 2287 movdqa %xmm6,%xmm8 2288 psrld $20,%xmm8 2289 pslld $32-20,%xmm6 2290 pxor %xmm8,%xmm6 2291 movdqa %xmm5,%xmm8 2292 psrld $20,%xmm8 2293 pslld $32-20,%xmm5 2294 pxor %xmm8,%xmm5 2295 movdqa %xmm4,%xmm8 2296 psrld $20,%xmm8 2297 pslld $32-20,%xmm4 2298 pxor %xmm8,%xmm4 2299 movdqa .rol8(%rip),%xmm8 2300 paddd %xmm7,%xmm3 2301 paddd %xmm6,%xmm2 2302 paddd %xmm5,%xmm1 2303 paddd %xmm4,%xmm0 2304 pxor %xmm3,%xmm15 2305 pxor %xmm2,%xmm14 2306 pxor %xmm1,%xmm13 2307 pxor %xmm0,%xmm12 2308.byte 102,69,15,56,0,248 2309.byte 102,69,15,56,0,240 2310.byte 102,69,15,56,0,232 2311.byte 102,69,15,56,0,224 2312 movdqa 80(%rbp),%xmm8 2313 paddd %xmm15,%xmm11 2314 paddd %xmm14,%xmm10 2315 paddd %xmm13,%xmm9 2316 paddd %xmm12,%xmm8 2317 pxor %xmm11,%xmm7 2318 pxor %xmm10,%xmm6 2319 pxor %xmm9,%xmm5 2320 pxor %xmm8,%xmm4 2321 movdqa %xmm8,80(%rbp) 2322 movdqa %xmm7,%xmm8 2323 psrld $25,%xmm8 2324 pslld $32-25,%xmm7 2325 pxor %xmm8,%xmm7 2326 movdqa %xmm6,%xmm8 2327 psrld $25,%xmm8 2328 pslld $32-25,%xmm6 2329 pxor %xmm8,%xmm6 2330 movdqa %xmm5,%xmm8 2331 psrld $25,%xmm8 2332 pslld $32-25,%xmm5 2333 pxor %xmm8,%xmm5 2334 movdqa %xmm4,%xmm8 2335 psrld $25,%xmm8 2336 pslld $32-25,%xmm4 2337 pxor %xmm8,%xmm4 2338 movdqa 80(%rbp),%xmm8 2339.byte 102,15,58,15,255,12 2340.byte 102,69,15,58,15,219,8 2341.byte 102,69,15,58,15,255,4 2342.byte 102,15,58,15,246,12 2343.byte 102,69,15,58,15,210,8 2344.byte 102,69,15,58,15,246,4 2345.byte 102,15,58,15,237,12 2346.byte 102,69,15,58,15,201,8 2347.byte 102,69,15,58,15,237,4 2348.byte 102,15,58,15,228,12 2349.byte 102,69,15,58,15,192,8 2350.byte 102,69,15,58,15,228,4 2351 2352 decq %r10 2353 jnz 1b 2354 paddd .chacha20_consts(%rip),%xmm3 2355 paddd 48(%rbp),%xmm7 2356 paddd 64(%rbp),%xmm11 2357 paddd 144(%rbp),%xmm15 2358 paddd .chacha20_consts(%rip),%xmm2 2359 paddd 48(%rbp),%xmm6 2360 paddd 64(%rbp),%xmm10 2361 paddd 128(%rbp),%xmm14 2362 paddd .chacha20_consts(%rip),%xmm1 2363 paddd 48(%rbp),%xmm5 2364 paddd 64(%rbp),%xmm9 2365 paddd 112(%rbp),%xmm13 2366 paddd .chacha20_consts(%rip),%xmm0 2367 paddd 48(%rbp),%xmm4 2368 paddd 64(%rbp),%xmm8 2369 paddd 96(%rbp),%xmm12 2370 2371 2372 pand .clamp(%rip),%xmm3 2373 movdqa %xmm3,0(%rbp) 2374 movdqa %xmm7,16(%rbp) 2375 2376 movq %r8,%r8 2377 call poly_hash_ad_internal 2378 movdqu 0 + 0(%rsi),%xmm3 2379 movdqu 16 + 0(%rsi),%xmm7 2380 movdqu 32 + 0(%rsi),%xmm11 2381 movdqu 48 + 0(%rsi),%xmm15 2382 pxor %xmm3,%xmm2 2383 pxor %xmm7,%xmm6 2384 pxor %xmm11,%xmm10 2385 pxor %xmm14,%xmm15 2386 movdqu %xmm2,0 + 0(%rdi) 2387 movdqu %xmm6,16 + 0(%rdi) 2388 movdqu %xmm10,32 + 0(%rdi) 2389 movdqu %xmm15,48 + 0(%rdi) 2390 movdqu 0 + 64(%rsi),%xmm3 2391 movdqu 16 + 64(%rsi),%xmm7 2392 movdqu 32 + 64(%rsi),%xmm11 2393 movdqu 48 + 64(%rsi),%xmm15 2394 pxor %xmm3,%xmm1 2395 pxor %xmm7,%xmm5 2396 pxor %xmm11,%xmm9 2397 pxor %xmm13,%xmm15 2398 movdqu %xmm1,0 + 64(%rdi) 2399 movdqu %xmm5,16 + 64(%rdi) 2400 movdqu %xmm9,32 + 64(%rdi) 2401 movdqu %xmm15,48 + 64(%rdi) 2402 2403 cmpq $192,%rbx 2404 ja 1f 2405 movq $128,%rcx 2406 subq $128,%rbx 2407 leaq 128(%rsi),%rsi 2408 jmp seal_sse_128_seal_hash 24091: 2410 movdqu 0 + 128(%rsi),%xmm3 2411 movdqu 16 + 128(%rsi),%xmm7 2412 movdqu 32 + 128(%rsi),%xmm11 2413 movdqu 48 + 128(%rsi),%xmm15 2414 pxor %xmm3,%xmm0 2415 pxor %xmm7,%xmm4 2416 pxor %xmm11,%xmm8 2417 pxor %xmm12,%xmm15 2418 movdqu %xmm0,0 + 128(%rdi) 2419 movdqu %xmm4,16 + 128(%rdi) 2420 movdqu %xmm8,32 + 128(%rdi) 2421 movdqu %xmm15,48 + 128(%rdi) 2422 2423 movq $192,%rcx 2424 subq $192,%rbx 2425 leaq 192(%rsi),%rsi 2426 movq $2,%rcx 2427 movq $8,%r8 2428 cmpq $64,%rbx 2429 jbe seal_sse_tail_64 2430 cmpq $128,%rbx 2431 jbe seal_sse_tail_128 2432 cmpq $192,%rbx 2433 jbe seal_sse_tail_192 2434 24351: 2436 movdqa .chacha20_consts(%rip),%xmm0 2437 movdqa 48(%rbp),%xmm4 2438 movdqa 64(%rbp),%xmm8 2439 movdqa %xmm0,%xmm1 2440 movdqa %xmm4,%xmm5 2441 movdqa %xmm8,%xmm9 2442 movdqa %xmm0,%xmm2 2443 movdqa %xmm4,%xmm6 2444 movdqa %xmm8,%xmm10 2445 movdqa %xmm0,%xmm3 2446 movdqa %xmm4,%xmm7 2447 movdqa %xmm8,%xmm11 2448 movdqa 96(%rbp),%xmm15 2449 paddd .sse_inc(%rip),%xmm15 2450 movdqa %xmm15,%xmm14 2451 paddd .sse_inc(%rip),%xmm14 2452 movdqa %xmm14,%xmm13 2453 paddd .sse_inc(%rip),%xmm13 2454 movdqa %xmm13,%xmm12 2455 paddd .sse_inc(%rip),%xmm12 2456 movdqa %xmm12,96(%rbp) 2457 movdqa %xmm13,112(%rbp) 2458 movdqa %xmm14,128(%rbp) 2459 movdqa %xmm15,144(%rbp) 2460 24612: 2462 movdqa %xmm8,80(%rbp) 2463 movdqa .rol16(%rip),%xmm8 2464 paddd %xmm7,%xmm3 2465 paddd %xmm6,%xmm2 2466 paddd %xmm5,%xmm1 2467 paddd %xmm4,%xmm0 2468 pxor %xmm3,%xmm15 2469 pxor %xmm2,%xmm14 2470 pxor %xmm1,%xmm13 2471 pxor %xmm0,%xmm12 2472.byte 102,69,15,56,0,248 2473.byte 102,69,15,56,0,240 2474.byte 102,69,15,56,0,232 2475.byte 102,69,15,56,0,224 2476 movdqa 80(%rbp),%xmm8 2477 paddd %xmm15,%xmm11 2478 paddd %xmm14,%xmm10 2479 paddd %xmm13,%xmm9 2480 paddd %xmm12,%xmm8 2481 pxor %xmm11,%xmm7 2482 addq 0(%rdi),%r10 2483 adcq 8+0(%rdi),%r11 2484 adcq $1,%r12 2485 pxor %xmm10,%xmm6 2486 pxor %xmm9,%xmm5 2487 pxor %xmm8,%xmm4 2488 movdqa %xmm8,80(%rbp) 2489 movdqa %xmm7,%xmm8 2490 psrld $20,%xmm8 2491 pslld $32-20,%xmm7 2492 pxor %xmm8,%xmm7 2493 movdqa %xmm6,%xmm8 2494 psrld $20,%xmm8 2495 pslld $32-20,%xmm6 2496 pxor %xmm8,%xmm6 2497 movdqa %xmm5,%xmm8 2498 psrld $20,%xmm8 2499 pslld $32-20,%xmm5 2500 pxor %xmm8,%xmm5 2501 movdqa %xmm4,%xmm8 2502 psrld $20,%xmm8 2503 pslld $32-20,%xmm4 2504 pxor %xmm8,%xmm4 2505 movq 0+0(%rbp),%rax 2506 movq %rax,%r15 2507 mulq %r10 2508 movq %rax,%r13 2509 movq %rdx,%r14 2510 movq 0+0(%rbp),%rax 2511 mulq %r11 2512 imulq %r12,%r15 2513 addq %rax,%r14 2514 adcq %rdx,%r15 2515 movdqa .rol8(%rip),%xmm8 2516 paddd %xmm7,%xmm3 2517 paddd %xmm6,%xmm2 2518 paddd %xmm5,%xmm1 2519 paddd %xmm4,%xmm0 2520 pxor %xmm3,%xmm15 2521 pxor %xmm2,%xmm14 2522 pxor %xmm1,%xmm13 2523 pxor %xmm0,%xmm12 2524.byte 102,69,15,56,0,248 2525.byte 102,69,15,56,0,240 2526.byte 102,69,15,56,0,232 2527.byte 102,69,15,56,0,224 2528 movdqa 80(%rbp),%xmm8 2529 paddd %xmm15,%xmm11 2530 paddd %xmm14,%xmm10 2531 paddd %xmm13,%xmm9 2532 paddd %xmm12,%xmm8 2533 pxor %xmm11,%xmm7 2534 pxor %xmm10,%xmm6 2535 movq 8+0(%rbp),%rax 2536 movq %rax,%r9 2537 mulq %r10 2538 addq %rax,%r14 2539 adcq $0,%rdx 2540 movq %rdx,%r10 2541 movq 8+0(%rbp),%rax 2542 mulq %r11 2543 addq %rax,%r15 2544 adcq $0,%rdx 2545 pxor %xmm9,%xmm5 2546 pxor %xmm8,%xmm4 2547 movdqa %xmm8,80(%rbp) 2548 movdqa %xmm7,%xmm8 2549 psrld $25,%xmm8 2550 pslld $32-25,%xmm7 2551 pxor %xmm8,%xmm7 2552 movdqa %xmm6,%xmm8 2553 psrld $25,%xmm8 2554 pslld $32-25,%xmm6 2555 pxor %xmm8,%xmm6 2556 movdqa %xmm5,%xmm8 2557 psrld $25,%xmm8 2558 pslld $32-25,%xmm5 2559 pxor %xmm8,%xmm5 2560 movdqa %xmm4,%xmm8 2561 psrld $25,%xmm8 2562 pslld $32-25,%xmm4 2563 pxor %xmm8,%xmm4 2564 movdqa 80(%rbp),%xmm8 2565 imulq %r12,%r9 2566 addq %r10,%r15 2567 adcq %rdx,%r9 2568.byte 102,15,58,15,255,4 2569.byte 102,69,15,58,15,219,8 2570.byte 102,69,15,58,15,255,12 2571.byte 102,15,58,15,246,4 2572.byte 102,69,15,58,15,210,8 2573.byte 102,69,15,58,15,246,12 2574.byte 102,15,58,15,237,4 2575.byte 102,69,15,58,15,201,8 2576.byte 102,69,15,58,15,237,12 2577.byte 102,15,58,15,228,4 2578.byte 102,69,15,58,15,192,8 2579.byte 102,69,15,58,15,228,12 2580 movdqa %xmm8,80(%rbp) 2581 movdqa .rol16(%rip),%xmm8 2582 paddd %xmm7,%xmm3 2583 paddd %xmm6,%xmm2 2584 paddd %xmm5,%xmm1 2585 paddd %xmm4,%xmm0 2586 pxor %xmm3,%xmm15 2587 pxor %xmm2,%xmm14 2588 movq %r13,%r10 2589 movq %r14,%r11 2590 movq %r15,%r12 2591 andq $3,%r12 2592 movq %r15,%r13 2593 andq $-4,%r13 2594 movq %r9,%r14 2595 shrdq $2,%r9,%r15 2596 shrq $2,%r9 2597 addq %r13,%r10 2598 adcq %r14,%r11 2599 adcq $0,%r12 2600 addq %r15,%r10 2601 adcq %r9,%r11 2602 adcq $0,%r12 2603 pxor %xmm1,%xmm13 2604 pxor %xmm0,%xmm12 2605.byte 102,69,15,56,0,248 2606.byte 102,69,15,56,0,240 2607.byte 102,69,15,56,0,232 2608.byte 102,69,15,56,0,224 2609 movdqa 80(%rbp),%xmm8 2610 paddd %xmm15,%xmm11 2611 paddd %xmm14,%xmm10 2612 paddd %xmm13,%xmm9 2613 paddd %xmm12,%xmm8 2614 pxor %xmm11,%xmm7 2615 pxor %xmm10,%xmm6 2616 pxor %xmm9,%xmm5 2617 pxor %xmm8,%xmm4 2618 movdqa %xmm8,80(%rbp) 2619 movdqa %xmm7,%xmm8 2620 psrld $20,%xmm8 2621 pslld $32-20,%xmm7 2622 pxor %xmm8,%xmm7 2623 movdqa %xmm6,%xmm8 2624 psrld $20,%xmm8 2625 pslld $32-20,%xmm6 2626 pxor %xmm8,%xmm6 2627 movdqa %xmm5,%xmm8 2628 psrld $20,%xmm8 2629 pslld $32-20,%xmm5 2630 pxor %xmm8,%xmm5 2631 movdqa %xmm4,%xmm8 2632 psrld $20,%xmm8 2633 pslld $32-20,%xmm4 2634 pxor %xmm8,%xmm4 2635 movdqa .rol8(%rip),%xmm8 2636 paddd %xmm7,%xmm3 2637 paddd %xmm6,%xmm2 2638 paddd %xmm5,%xmm1 2639 paddd %xmm4,%xmm0 2640 pxor %xmm3,%xmm15 2641 pxor %xmm2,%xmm14 2642 pxor %xmm1,%xmm13 2643 pxor %xmm0,%xmm12 2644.byte 102,69,15,56,0,248 2645.byte 102,69,15,56,0,240 2646.byte 102,69,15,56,0,232 2647.byte 102,69,15,56,0,224 2648 movdqa 80(%rbp),%xmm8 2649 paddd %xmm15,%xmm11 2650 paddd %xmm14,%xmm10 2651 paddd %xmm13,%xmm9 2652 paddd %xmm12,%xmm8 2653 pxor %xmm11,%xmm7 2654 pxor %xmm10,%xmm6 2655 pxor %xmm9,%xmm5 2656 pxor %xmm8,%xmm4 2657 movdqa %xmm8,80(%rbp) 2658 movdqa %xmm7,%xmm8 2659 psrld $25,%xmm8 2660 pslld $32-25,%xmm7 2661 pxor %xmm8,%xmm7 2662 movdqa %xmm6,%xmm8 2663 psrld $25,%xmm8 2664 pslld $32-25,%xmm6 2665 pxor %xmm8,%xmm6 2666 movdqa %xmm5,%xmm8 2667 psrld $25,%xmm8 2668 pslld $32-25,%xmm5 2669 pxor %xmm8,%xmm5 2670 movdqa %xmm4,%xmm8 2671 psrld $25,%xmm8 2672 pslld $32-25,%xmm4 2673 pxor %xmm8,%xmm4 2674 movdqa 80(%rbp),%xmm8 2675.byte 102,15,58,15,255,12 2676.byte 102,69,15,58,15,219,8 2677.byte 102,69,15,58,15,255,4 2678.byte 102,15,58,15,246,12 2679.byte 102,69,15,58,15,210,8 2680.byte 102,69,15,58,15,246,4 2681.byte 102,15,58,15,237,12 2682.byte 102,69,15,58,15,201,8 2683.byte 102,69,15,58,15,237,4 2684.byte 102,15,58,15,228,12 2685.byte 102,69,15,58,15,192,8 2686.byte 102,69,15,58,15,228,4 2687 2688 leaq 16(%rdi),%rdi 2689 decq %r8 2690 jge 2b 2691 addq 0(%rdi),%r10 2692 adcq 8+0(%rdi),%r11 2693 adcq $1,%r12 2694 movq 0+0(%rbp),%rax 2695 movq %rax,%r15 2696 mulq %r10 2697 movq %rax,%r13 2698 movq %rdx,%r14 2699 movq 0+0(%rbp),%rax 2700 mulq %r11 2701 imulq %r12,%r15 2702 addq %rax,%r14 2703 adcq %rdx,%r15 2704 movq 8+0(%rbp),%rax 2705 movq %rax,%r9 2706 mulq %r10 2707 addq %rax,%r14 2708 adcq $0,%rdx 2709 movq %rdx,%r10 2710 movq 8+0(%rbp),%rax 2711 mulq %r11 2712 addq %rax,%r15 2713 adcq $0,%rdx 2714 imulq %r12,%r9 2715 addq %r10,%r15 2716 adcq %rdx,%r9 2717 movq %r13,%r10 2718 movq %r14,%r11 2719 movq %r15,%r12 2720 andq $3,%r12 2721 movq %r15,%r13 2722 andq $-4,%r13 2723 movq %r9,%r14 2724 shrdq $2,%r9,%r15 2725 shrq $2,%r9 2726 addq %r13,%r10 2727 adcq %r14,%r11 2728 adcq $0,%r12 2729 addq %r15,%r10 2730 adcq %r9,%r11 2731 adcq $0,%r12 2732 2733 leaq 16(%rdi),%rdi 2734 decq %rcx 2735 jg 2b 2736 paddd .chacha20_consts(%rip),%xmm3 2737 paddd 48(%rbp),%xmm7 2738 paddd 64(%rbp),%xmm11 2739 paddd 144(%rbp),%xmm15 2740 paddd .chacha20_consts(%rip),%xmm2 2741 paddd 48(%rbp),%xmm6 2742 paddd 64(%rbp),%xmm10 2743 paddd 128(%rbp),%xmm14 2744 paddd .chacha20_consts(%rip),%xmm1 2745 paddd 48(%rbp),%xmm5 2746 paddd 64(%rbp),%xmm9 2747 paddd 112(%rbp),%xmm13 2748 paddd .chacha20_consts(%rip),%xmm0 2749 paddd 48(%rbp),%xmm4 2750 paddd 64(%rbp),%xmm8 2751 paddd 96(%rbp),%xmm12 2752 2753 movdqa %xmm14,80(%rbp) 2754 movdqa %xmm14,80(%rbp) 2755 movdqu 0 + 0(%rsi),%xmm14 2756 pxor %xmm3,%xmm14 2757 movdqu %xmm14,0 + 0(%rdi) 2758 movdqu 16 + 0(%rsi),%xmm14 2759 pxor %xmm7,%xmm14 2760 movdqu %xmm14,16 + 0(%rdi) 2761 movdqu 32 + 0(%rsi),%xmm14 2762 pxor %xmm11,%xmm14 2763 movdqu %xmm14,32 + 0(%rdi) 2764 movdqu 48 + 0(%rsi),%xmm14 2765 pxor %xmm15,%xmm14 2766 movdqu %xmm14,48 + 0(%rdi) 2767 2768 movdqa 80(%rbp),%xmm14 2769 movdqu 0 + 64(%rsi),%xmm3 2770 movdqu 16 + 64(%rsi),%xmm7 2771 movdqu 32 + 64(%rsi),%xmm11 2772 movdqu 48 + 64(%rsi),%xmm15 2773 pxor %xmm3,%xmm2 2774 pxor %xmm7,%xmm6 2775 pxor %xmm11,%xmm10 2776 pxor %xmm14,%xmm15 2777 movdqu %xmm2,0 + 64(%rdi) 2778 movdqu %xmm6,16 + 64(%rdi) 2779 movdqu %xmm10,32 + 64(%rdi) 2780 movdqu %xmm15,48 + 64(%rdi) 2781 movdqu 0 + 128(%rsi),%xmm3 2782 movdqu 16 + 128(%rsi),%xmm7 2783 movdqu 32 + 128(%rsi),%xmm11 2784 movdqu 48 + 128(%rsi),%xmm15 2785 pxor %xmm3,%xmm1 2786 pxor %xmm7,%xmm5 2787 pxor %xmm11,%xmm9 2788 pxor %xmm13,%xmm15 2789 movdqu %xmm1,0 + 128(%rdi) 2790 movdqu %xmm5,16 + 128(%rdi) 2791 movdqu %xmm9,32 + 128(%rdi) 2792 movdqu %xmm15,48 + 128(%rdi) 2793 2794 cmpq $256,%rbx 2795 ja 3f 2796 2797 movq $192,%rcx 2798 subq $192,%rbx 2799 leaq 192(%rsi),%rsi 2800 jmp seal_sse_128_seal_hash 28013: 2802 movdqu 0 + 192(%rsi),%xmm3 2803 movdqu 16 + 192(%rsi),%xmm7 2804 movdqu 32 + 192(%rsi),%xmm11 2805 movdqu 48 + 192(%rsi),%xmm15 2806 pxor %xmm3,%xmm0 2807 pxor %xmm7,%xmm4 2808 pxor %xmm11,%xmm8 2809 pxor %xmm12,%xmm15 2810 movdqu %xmm0,0 + 192(%rdi) 2811 movdqu %xmm4,16 + 192(%rdi) 2812 movdqu %xmm8,32 + 192(%rdi) 2813 movdqu %xmm15,48 + 192(%rdi) 2814 2815 leaq 256(%rsi),%rsi 2816 subq $256,%rbx 2817 movq $6,%rcx 2818 movq $4,%r8 2819 cmpq $192,%rbx 2820 jg 1b 2821 movq %rbx,%rcx 2822 testq %rbx,%rbx 2823 je seal_sse_128_seal_hash 2824 movq $6,%rcx 2825 cmpq $64,%rbx 2826 jg 3f 2827 2828seal_sse_tail_64: 2829 movdqa .chacha20_consts(%rip),%xmm0 2830 movdqa 48(%rbp),%xmm4 2831 movdqa 64(%rbp),%xmm8 2832 movdqa 96(%rbp),%xmm12 2833 paddd .sse_inc(%rip),%xmm12 2834 movdqa %xmm12,96(%rbp) 2835 28361: 2837 addq 0(%rdi),%r10 2838 adcq 8+0(%rdi),%r11 2839 adcq $1,%r12 2840 movq 0+0(%rbp),%rax 2841 movq %rax,%r15 2842 mulq %r10 2843 movq %rax,%r13 2844 movq %rdx,%r14 2845 movq 0+0(%rbp),%rax 2846 mulq %r11 2847 imulq %r12,%r15 2848 addq %rax,%r14 2849 adcq %rdx,%r15 2850 movq 8+0(%rbp),%rax 2851 movq %rax,%r9 2852 mulq %r10 2853 addq %rax,%r14 2854 adcq $0,%rdx 2855 movq %rdx,%r10 2856 movq 8+0(%rbp),%rax 2857 mulq %r11 2858 addq %rax,%r15 2859 adcq $0,%rdx 2860 imulq %r12,%r9 2861 addq %r10,%r15 2862 adcq %rdx,%r9 2863 movq %r13,%r10 2864 movq %r14,%r11 2865 movq %r15,%r12 2866 andq $3,%r12 2867 movq %r15,%r13 2868 andq $-4,%r13 2869 movq %r9,%r14 2870 shrdq $2,%r9,%r15 2871 shrq $2,%r9 2872 addq %r13,%r10 2873 adcq %r14,%r11 2874 adcq $0,%r12 2875 addq %r15,%r10 2876 adcq %r9,%r11 2877 adcq $0,%r12 2878 2879 leaq 16(%rdi),%rdi 28802: 2881 paddd %xmm4,%xmm0 2882 pxor %xmm0,%xmm12 2883 pshufb .rol16(%rip),%xmm12 2884 paddd %xmm12,%xmm8 2885 pxor %xmm8,%xmm4 2886 movdqa %xmm4,%xmm3 2887 pslld $12,%xmm3 2888 psrld $20,%xmm4 2889 pxor %xmm3,%xmm4 2890 paddd %xmm4,%xmm0 2891 pxor %xmm0,%xmm12 2892 pshufb .rol8(%rip),%xmm12 2893 paddd %xmm12,%xmm8 2894 pxor %xmm8,%xmm4 2895 movdqa %xmm4,%xmm3 2896 pslld $7,%xmm3 2897 psrld $25,%xmm4 2898 pxor %xmm3,%xmm4 2899.byte 102,15,58,15,228,4 2900.byte 102,69,15,58,15,192,8 2901.byte 102,69,15,58,15,228,12 2902 paddd %xmm4,%xmm0 2903 pxor %xmm0,%xmm12 2904 pshufb .rol16(%rip),%xmm12 2905 paddd %xmm12,%xmm8 2906 pxor %xmm8,%xmm4 2907 movdqa %xmm4,%xmm3 2908 pslld $12,%xmm3 2909 psrld $20,%xmm4 2910 pxor %xmm3,%xmm4 2911 paddd %xmm4,%xmm0 2912 pxor %xmm0,%xmm12 2913 pshufb .rol8(%rip),%xmm12 2914 paddd %xmm12,%xmm8 2915 pxor %xmm8,%xmm4 2916 movdqa %xmm4,%xmm3 2917 pslld $7,%xmm3 2918 psrld $25,%xmm4 2919 pxor %xmm3,%xmm4 2920.byte 102,15,58,15,228,12 2921.byte 102,69,15,58,15,192,8 2922.byte 102,69,15,58,15,228,4 2923 addq 0(%rdi),%r10 2924 adcq 8+0(%rdi),%r11 2925 adcq $1,%r12 2926 movq 0+0(%rbp),%rax 2927 movq %rax,%r15 2928 mulq %r10 2929 movq %rax,%r13 2930 movq %rdx,%r14 2931 movq 0+0(%rbp),%rax 2932 mulq %r11 2933 imulq %r12,%r15 2934 addq %rax,%r14 2935 adcq %rdx,%r15 2936 movq 8+0(%rbp),%rax 2937 movq %rax,%r9 2938 mulq %r10 2939 addq %rax,%r14 2940 adcq $0,%rdx 2941 movq %rdx,%r10 2942 movq 8+0(%rbp),%rax 2943 mulq %r11 2944 addq %rax,%r15 2945 adcq $0,%rdx 2946 imulq %r12,%r9 2947 addq %r10,%r15 2948 adcq %rdx,%r9 2949 movq %r13,%r10 2950 movq %r14,%r11 2951 movq %r15,%r12 2952 andq $3,%r12 2953 movq %r15,%r13 2954 andq $-4,%r13 2955 movq %r9,%r14 2956 shrdq $2,%r9,%r15 2957 shrq $2,%r9 2958 addq %r13,%r10 2959 adcq %r14,%r11 2960 adcq $0,%r12 2961 addq %r15,%r10 2962 adcq %r9,%r11 2963 adcq $0,%r12 2964 2965 leaq 16(%rdi),%rdi 2966 decq %rcx 2967 jg 1b 2968 decq %r8 2969 jge 2b 2970 paddd .chacha20_consts(%rip),%xmm0 2971 paddd 48(%rbp),%xmm4 2972 paddd 64(%rbp),%xmm8 2973 paddd 96(%rbp),%xmm12 2974 2975 jmp seal_sse_128_seal 29763: 2977 cmpq $128,%rbx 2978 jg 3f 2979 2980seal_sse_tail_128: 2981 movdqa .chacha20_consts(%rip),%xmm0 2982 movdqa 48(%rbp),%xmm4 2983 movdqa 64(%rbp),%xmm8 2984 movdqa %xmm0,%xmm1 2985 movdqa %xmm4,%xmm5 2986 movdqa %xmm8,%xmm9 2987 movdqa 96(%rbp),%xmm13 2988 paddd .sse_inc(%rip),%xmm13 2989 movdqa %xmm13,%xmm12 2990 paddd .sse_inc(%rip),%xmm12 2991 movdqa %xmm12,96(%rbp) 2992 movdqa %xmm13,112(%rbp) 2993 29941: 2995 addq 0(%rdi),%r10 2996 adcq 8+0(%rdi),%r11 2997 adcq $1,%r12 2998 movq 0+0(%rbp),%rax 2999 movq %rax,%r15 3000 mulq %r10 3001 movq %rax,%r13 3002 movq %rdx,%r14 3003 movq 0+0(%rbp),%rax 3004 mulq %r11 3005 imulq %r12,%r15 3006 addq %rax,%r14 3007 adcq %rdx,%r15 3008 movq 8+0(%rbp),%rax 3009 movq %rax,%r9 3010 mulq %r10 3011 addq %rax,%r14 3012 adcq $0,%rdx 3013 movq %rdx,%r10 3014 movq 8+0(%rbp),%rax 3015 mulq %r11 3016 addq %rax,%r15 3017 adcq $0,%rdx 3018 imulq %r12,%r9 3019 addq %r10,%r15 3020 adcq %rdx,%r9 3021 movq %r13,%r10 3022 movq %r14,%r11 3023 movq %r15,%r12 3024 andq $3,%r12 3025 movq %r15,%r13 3026 andq $-4,%r13 3027 movq %r9,%r14 3028 shrdq $2,%r9,%r15 3029 shrq $2,%r9 3030 addq %r13,%r10 3031 adcq %r14,%r11 3032 adcq $0,%r12 3033 addq %r15,%r10 3034 adcq %r9,%r11 3035 adcq $0,%r12 3036 3037 leaq 16(%rdi),%rdi 30382: 3039 paddd %xmm4,%xmm0 3040 pxor %xmm0,%xmm12 3041 pshufb .rol16(%rip),%xmm12 3042 paddd %xmm12,%xmm8 3043 pxor %xmm8,%xmm4 3044 movdqa %xmm4,%xmm3 3045 pslld $12,%xmm3 3046 psrld $20,%xmm4 3047 pxor %xmm3,%xmm4 3048 paddd %xmm4,%xmm0 3049 pxor %xmm0,%xmm12 3050 pshufb .rol8(%rip),%xmm12 3051 paddd %xmm12,%xmm8 3052 pxor %xmm8,%xmm4 3053 movdqa %xmm4,%xmm3 3054 pslld $7,%xmm3 3055 psrld $25,%xmm4 3056 pxor %xmm3,%xmm4 3057.byte 102,15,58,15,228,4 3058.byte 102,69,15,58,15,192,8 3059.byte 102,69,15,58,15,228,12 3060 paddd %xmm5,%xmm1 3061 pxor %xmm1,%xmm13 3062 pshufb .rol16(%rip),%xmm13 3063 paddd %xmm13,%xmm9 3064 pxor %xmm9,%xmm5 3065 movdqa %xmm5,%xmm3 3066 pslld $12,%xmm3 3067 psrld $20,%xmm5 3068 pxor %xmm3,%xmm5 3069 paddd %xmm5,%xmm1 3070 pxor %xmm1,%xmm13 3071 pshufb .rol8(%rip),%xmm13 3072 paddd %xmm13,%xmm9 3073 pxor %xmm9,%xmm5 3074 movdqa %xmm5,%xmm3 3075 pslld $7,%xmm3 3076 psrld $25,%xmm5 3077 pxor %xmm3,%xmm5 3078.byte 102,15,58,15,237,4 3079.byte 102,69,15,58,15,201,8 3080.byte 102,69,15,58,15,237,12 3081 addq 0(%rdi),%r10 3082 adcq 8+0(%rdi),%r11 3083 adcq $1,%r12 3084 movq 0+0(%rbp),%rax 3085 movq %rax,%r15 3086 mulq %r10 3087 movq %rax,%r13 3088 movq %rdx,%r14 3089 movq 0+0(%rbp),%rax 3090 mulq %r11 3091 imulq %r12,%r15 3092 addq %rax,%r14 3093 adcq %rdx,%r15 3094 movq 8+0(%rbp),%rax 3095 movq %rax,%r9 3096 mulq %r10 3097 addq %rax,%r14 3098 adcq $0,%rdx 3099 movq %rdx,%r10 3100 movq 8+0(%rbp),%rax 3101 mulq %r11 3102 addq %rax,%r15 3103 adcq $0,%rdx 3104 imulq %r12,%r9 3105 addq %r10,%r15 3106 adcq %rdx,%r9 3107 movq %r13,%r10 3108 movq %r14,%r11 3109 movq %r15,%r12 3110 andq $3,%r12 3111 movq %r15,%r13 3112 andq $-4,%r13 3113 movq %r9,%r14 3114 shrdq $2,%r9,%r15 3115 shrq $2,%r9 3116 addq %r13,%r10 3117 adcq %r14,%r11 3118 adcq $0,%r12 3119 addq %r15,%r10 3120 adcq %r9,%r11 3121 adcq $0,%r12 3122 paddd %xmm4,%xmm0 3123 pxor %xmm0,%xmm12 3124 pshufb .rol16(%rip),%xmm12 3125 paddd %xmm12,%xmm8 3126 pxor %xmm8,%xmm4 3127 movdqa %xmm4,%xmm3 3128 pslld $12,%xmm3 3129 psrld $20,%xmm4 3130 pxor %xmm3,%xmm4 3131 paddd %xmm4,%xmm0 3132 pxor %xmm0,%xmm12 3133 pshufb .rol8(%rip),%xmm12 3134 paddd %xmm12,%xmm8 3135 pxor %xmm8,%xmm4 3136 movdqa %xmm4,%xmm3 3137 pslld $7,%xmm3 3138 psrld $25,%xmm4 3139 pxor %xmm3,%xmm4 3140.byte 102,15,58,15,228,12 3141.byte 102,69,15,58,15,192,8 3142.byte 102,69,15,58,15,228,4 3143 paddd %xmm5,%xmm1 3144 pxor %xmm1,%xmm13 3145 pshufb .rol16(%rip),%xmm13 3146 paddd %xmm13,%xmm9 3147 pxor %xmm9,%xmm5 3148 movdqa %xmm5,%xmm3 3149 pslld $12,%xmm3 3150 psrld $20,%xmm5 3151 pxor %xmm3,%xmm5 3152 paddd %xmm5,%xmm1 3153 pxor %xmm1,%xmm13 3154 pshufb .rol8(%rip),%xmm13 3155 paddd %xmm13,%xmm9 3156 pxor %xmm9,%xmm5 3157 movdqa %xmm5,%xmm3 3158 pslld $7,%xmm3 3159 psrld $25,%xmm5 3160 pxor %xmm3,%xmm5 3161.byte 102,15,58,15,237,12 3162.byte 102,69,15,58,15,201,8 3163.byte 102,69,15,58,15,237,4 3164 3165 leaq 16(%rdi),%rdi 3166 decq %rcx 3167 jg 1b 3168 decq %r8 3169 jge 2b 3170 paddd .chacha20_consts(%rip),%xmm1 3171 paddd 48(%rbp),%xmm5 3172 paddd 64(%rbp),%xmm9 3173 paddd 112(%rbp),%xmm13 3174 paddd .chacha20_consts(%rip),%xmm0 3175 paddd 48(%rbp),%xmm4 3176 paddd 64(%rbp),%xmm8 3177 paddd 96(%rbp),%xmm12 3178 movdqu 0 + 0(%rsi),%xmm3 3179 movdqu 16 + 0(%rsi),%xmm7 3180 movdqu 32 + 0(%rsi),%xmm11 3181 movdqu 48 + 0(%rsi),%xmm15 3182 pxor %xmm3,%xmm1 3183 pxor %xmm7,%xmm5 3184 pxor %xmm11,%xmm9 3185 pxor %xmm13,%xmm15 3186 movdqu %xmm1,0 + 0(%rdi) 3187 movdqu %xmm5,16 + 0(%rdi) 3188 movdqu %xmm9,32 + 0(%rdi) 3189 movdqu %xmm15,48 + 0(%rdi) 3190 3191 movq $64,%rcx 3192 subq $64,%rbx 3193 leaq 64(%rsi),%rsi 3194 jmp seal_sse_128_seal_hash 31953: 3196 3197seal_sse_tail_192: 3198 movdqa .chacha20_consts(%rip),%xmm0 3199 movdqa 48(%rbp),%xmm4 3200 movdqa 64(%rbp),%xmm8 3201 movdqa %xmm0,%xmm1 3202 movdqa %xmm4,%xmm5 3203 movdqa %xmm8,%xmm9 3204 movdqa %xmm0,%xmm2 3205 movdqa %xmm4,%xmm6 3206 movdqa %xmm8,%xmm10 3207 movdqa 96(%rbp),%xmm14 3208 paddd .sse_inc(%rip),%xmm14 3209 movdqa %xmm14,%xmm13 3210 paddd .sse_inc(%rip),%xmm13 3211 movdqa %xmm13,%xmm12 3212 paddd .sse_inc(%rip),%xmm12 3213 movdqa %xmm12,96(%rbp) 3214 movdqa %xmm13,112(%rbp) 3215 movdqa %xmm14,128(%rbp) 3216 32171: 3218 addq 0(%rdi),%r10 3219 adcq 8+0(%rdi),%r11 3220 adcq $1,%r12 3221 movq 0+0(%rbp),%rax 3222 movq %rax,%r15 3223 mulq %r10 3224 movq %rax,%r13 3225 movq %rdx,%r14 3226 movq 0+0(%rbp),%rax 3227 mulq %r11 3228 imulq %r12,%r15 3229 addq %rax,%r14 3230 adcq %rdx,%r15 3231 movq 8+0(%rbp),%rax 3232 movq %rax,%r9 3233 mulq %r10 3234 addq %rax,%r14 3235 adcq $0,%rdx 3236 movq %rdx,%r10 3237 movq 8+0(%rbp),%rax 3238 mulq %r11 3239 addq %rax,%r15 3240 adcq $0,%rdx 3241 imulq %r12,%r9 3242 addq %r10,%r15 3243 adcq %rdx,%r9 3244 movq %r13,%r10 3245 movq %r14,%r11 3246 movq %r15,%r12 3247 andq $3,%r12 3248 movq %r15,%r13 3249 andq $-4,%r13 3250 movq %r9,%r14 3251 shrdq $2,%r9,%r15 3252 shrq $2,%r9 3253 addq %r13,%r10 3254 adcq %r14,%r11 3255 adcq $0,%r12 3256 addq %r15,%r10 3257 adcq %r9,%r11 3258 adcq $0,%r12 3259 3260 leaq 16(%rdi),%rdi 32612: 3262 paddd %xmm4,%xmm0 3263 pxor %xmm0,%xmm12 3264 pshufb .rol16(%rip),%xmm12 3265 paddd %xmm12,%xmm8 3266 pxor %xmm8,%xmm4 3267 movdqa %xmm4,%xmm3 3268 pslld $12,%xmm3 3269 psrld $20,%xmm4 3270 pxor %xmm3,%xmm4 3271 paddd %xmm4,%xmm0 3272 pxor %xmm0,%xmm12 3273 pshufb .rol8(%rip),%xmm12 3274 paddd %xmm12,%xmm8 3275 pxor %xmm8,%xmm4 3276 movdqa %xmm4,%xmm3 3277 pslld $7,%xmm3 3278 psrld $25,%xmm4 3279 pxor %xmm3,%xmm4 3280.byte 102,15,58,15,228,4 3281.byte 102,69,15,58,15,192,8 3282.byte 102,69,15,58,15,228,12 3283 paddd %xmm5,%xmm1 3284 pxor %xmm1,%xmm13 3285 pshufb .rol16(%rip),%xmm13 3286 paddd %xmm13,%xmm9 3287 pxor %xmm9,%xmm5 3288 movdqa %xmm5,%xmm3 3289 pslld $12,%xmm3 3290 psrld $20,%xmm5 3291 pxor %xmm3,%xmm5 3292 paddd %xmm5,%xmm1 3293 pxor %xmm1,%xmm13 3294 pshufb .rol8(%rip),%xmm13 3295 paddd %xmm13,%xmm9 3296 pxor %xmm9,%xmm5 3297 movdqa %xmm5,%xmm3 3298 pslld $7,%xmm3 3299 psrld $25,%xmm5 3300 pxor %xmm3,%xmm5 3301.byte 102,15,58,15,237,4 3302.byte 102,69,15,58,15,201,8 3303.byte 102,69,15,58,15,237,12 3304 paddd %xmm6,%xmm2 3305 pxor %xmm2,%xmm14 3306 pshufb .rol16(%rip),%xmm14 3307 paddd %xmm14,%xmm10 3308 pxor %xmm10,%xmm6 3309 movdqa %xmm6,%xmm3 3310 pslld $12,%xmm3 3311 psrld $20,%xmm6 3312 pxor %xmm3,%xmm6 3313 paddd %xmm6,%xmm2 3314 pxor %xmm2,%xmm14 3315 pshufb .rol8(%rip),%xmm14 3316 paddd %xmm14,%xmm10 3317 pxor %xmm10,%xmm6 3318 movdqa %xmm6,%xmm3 3319 pslld $7,%xmm3 3320 psrld $25,%xmm6 3321 pxor %xmm3,%xmm6 3322.byte 102,15,58,15,246,4 3323.byte 102,69,15,58,15,210,8 3324.byte 102,69,15,58,15,246,12 3325 addq 0(%rdi),%r10 3326 adcq 8+0(%rdi),%r11 3327 adcq $1,%r12 3328 movq 0+0(%rbp),%rax 3329 movq %rax,%r15 3330 mulq %r10 3331 movq %rax,%r13 3332 movq %rdx,%r14 3333 movq 0+0(%rbp),%rax 3334 mulq %r11 3335 imulq %r12,%r15 3336 addq %rax,%r14 3337 adcq %rdx,%r15 3338 movq 8+0(%rbp),%rax 3339 movq %rax,%r9 3340 mulq %r10 3341 addq %rax,%r14 3342 adcq $0,%rdx 3343 movq %rdx,%r10 3344 movq 8+0(%rbp),%rax 3345 mulq %r11 3346 addq %rax,%r15 3347 adcq $0,%rdx 3348 imulq %r12,%r9 3349 addq %r10,%r15 3350 adcq %rdx,%r9 3351 movq %r13,%r10 3352 movq %r14,%r11 3353 movq %r15,%r12 3354 andq $3,%r12 3355 movq %r15,%r13 3356 andq $-4,%r13 3357 movq %r9,%r14 3358 shrdq $2,%r9,%r15 3359 shrq $2,%r9 3360 addq %r13,%r10 3361 adcq %r14,%r11 3362 adcq $0,%r12 3363 addq %r15,%r10 3364 adcq %r9,%r11 3365 adcq $0,%r12 3366 paddd %xmm4,%xmm0 3367 pxor %xmm0,%xmm12 3368 pshufb .rol16(%rip),%xmm12 3369 paddd %xmm12,%xmm8 3370 pxor %xmm8,%xmm4 3371 movdqa %xmm4,%xmm3 3372 pslld $12,%xmm3 3373 psrld $20,%xmm4 3374 pxor %xmm3,%xmm4 3375 paddd %xmm4,%xmm0 3376 pxor %xmm0,%xmm12 3377 pshufb .rol8(%rip),%xmm12 3378 paddd %xmm12,%xmm8 3379 pxor %xmm8,%xmm4 3380 movdqa %xmm4,%xmm3 3381 pslld $7,%xmm3 3382 psrld $25,%xmm4 3383 pxor %xmm3,%xmm4 3384.byte 102,15,58,15,228,12 3385.byte 102,69,15,58,15,192,8 3386.byte 102,69,15,58,15,228,4 3387 paddd %xmm5,%xmm1 3388 pxor %xmm1,%xmm13 3389 pshufb .rol16(%rip),%xmm13 3390 paddd %xmm13,%xmm9 3391 pxor %xmm9,%xmm5 3392 movdqa %xmm5,%xmm3 3393 pslld $12,%xmm3 3394 psrld $20,%xmm5 3395 pxor %xmm3,%xmm5 3396 paddd %xmm5,%xmm1 3397 pxor %xmm1,%xmm13 3398 pshufb .rol8(%rip),%xmm13 3399 paddd %xmm13,%xmm9 3400 pxor %xmm9,%xmm5 3401 movdqa %xmm5,%xmm3 3402 pslld $7,%xmm3 3403 psrld $25,%xmm5 3404 pxor %xmm3,%xmm5 3405.byte 102,15,58,15,237,12 3406.byte 102,69,15,58,15,201,8 3407.byte 102,69,15,58,15,237,4 3408 paddd %xmm6,%xmm2 3409 pxor %xmm2,%xmm14 3410 pshufb .rol16(%rip),%xmm14 3411 paddd %xmm14,%xmm10 3412 pxor %xmm10,%xmm6 3413 movdqa %xmm6,%xmm3 3414 pslld $12,%xmm3 3415 psrld $20,%xmm6 3416 pxor %xmm3,%xmm6 3417 paddd %xmm6,%xmm2 3418 pxor %xmm2,%xmm14 3419 pshufb .rol8(%rip),%xmm14 3420 paddd %xmm14,%xmm10 3421 pxor %xmm10,%xmm6 3422 movdqa %xmm6,%xmm3 3423 pslld $7,%xmm3 3424 psrld $25,%xmm6 3425 pxor %xmm3,%xmm6 3426.byte 102,15,58,15,246,12 3427.byte 102,69,15,58,15,210,8 3428.byte 102,69,15,58,15,246,4 3429 3430 leaq 16(%rdi),%rdi 3431 decq %rcx 3432 jg 1b 3433 decq %r8 3434 jge 2b 3435 paddd .chacha20_consts(%rip),%xmm2 3436 paddd 48(%rbp),%xmm6 3437 paddd 64(%rbp),%xmm10 3438 paddd 128(%rbp),%xmm14 3439 paddd .chacha20_consts(%rip),%xmm1 3440 paddd 48(%rbp),%xmm5 3441 paddd 64(%rbp),%xmm9 3442 paddd 112(%rbp),%xmm13 3443 paddd .chacha20_consts(%rip),%xmm0 3444 paddd 48(%rbp),%xmm4 3445 paddd 64(%rbp),%xmm8 3446 paddd 96(%rbp),%xmm12 3447 movdqu 0 + 0(%rsi),%xmm3 3448 movdqu 16 + 0(%rsi),%xmm7 3449 movdqu 32 + 0(%rsi),%xmm11 3450 movdqu 48 + 0(%rsi),%xmm15 3451 pxor %xmm3,%xmm2 3452 pxor %xmm7,%xmm6 3453 pxor %xmm11,%xmm10 3454 pxor %xmm14,%xmm15 3455 movdqu %xmm2,0 + 0(%rdi) 3456 movdqu %xmm6,16 + 0(%rdi) 3457 movdqu %xmm10,32 + 0(%rdi) 3458 movdqu %xmm15,48 + 0(%rdi) 3459 movdqu 0 + 64(%rsi),%xmm3 3460 movdqu 16 + 64(%rsi),%xmm7 3461 movdqu 32 + 64(%rsi),%xmm11 3462 movdqu 48 + 64(%rsi),%xmm15 3463 pxor %xmm3,%xmm1 3464 pxor %xmm7,%xmm5 3465 pxor %xmm11,%xmm9 3466 pxor %xmm13,%xmm15 3467 movdqu %xmm1,0 + 64(%rdi) 3468 movdqu %xmm5,16 + 64(%rdi) 3469 movdqu %xmm9,32 + 64(%rdi) 3470 movdqu %xmm15,48 + 64(%rdi) 3471 3472 movq $128,%rcx 3473 subq $128,%rbx 3474 leaq 128(%rsi),%rsi 3475 3476seal_sse_128_seal_hash: 3477 cmpq $16,%rcx 3478 jb seal_sse_128_seal 3479 addq 0(%rdi),%r10 3480 adcq 8+0(%rdi),%r11 3481 adcq $1,%r12 3482 movq 0+0(%rbp),%rax 3483 movq %rax,%r15 3484 mulq %r10 3485 movq %rax,%r13 3486 movq %rdx,%r14 3487 movq 0+0(%rbp),%rax 3488 mulq %r11 3489 imulq %r12,%r15 3490 addq %rax,%r14 3491 adcq %rdx,%r15 3492 movq 8+0(%rbp),%rax 3493 movq %rax,%r9 3494 mulq %r10 3495 addq %rax,%r14 3496 adcq $0,%rdx 3497 movq %rdx,%r10 3498 movq 8+0(%rbp),%rax 3499 mulq %r11 3500 addq %rax,%r15 3501 adcq $0,%rdx 3502 imulq %r12,%r9 3503 addq %r10,%r15 3504 adcq %rdx,%r9 3505 movq %r13,%r10 3506 movq %r14,%r11 3507 movq %r15,%r12 3508 andq $3,%r12 3509 movq %r15,%r13 3510 andq $-4,%r13 3511 movq %r9,%r14 3512 shrdq $2,%r9,%r15 3513 shrq $2,%r9 3514 addq %r13,%r10 3515 adcq %r14,%r11 3516 adcq $0,%r12 3517 addq %r15,%r10 3518 adcq %r9,%r11 3519 adcq $0,%r12 3520 3521 subq $16,%rcx 3522 leaq 16(%rdi),%rdi 3523 jmp seal_sse_128_seal_hash 3524 3525seal_sse_128_seal: 3526 cmpq $16,%rbx 3527 jb seal_sse_tail_16 3528 subq $16,%rbx 3529 3530 movdqu 0(%rsi),%xmm3 3531 pxor %xmm3,%xmm0 3532 movdqu %xmm0,0(%rdi) 3533 3534 addq 0(%rdi),%r10 3535 adcq 8(%rdi),%r11 3536 adcq $1,%r12 3537 leaq 16(%rsi),%rsi 3538 leaq 16(%rdi),%rdi 3539 movq 0+0(%rbp),%rax 3540 movq %rax,%r15 3541 mulq %r10 3542 movq %rax,%r13 3543 movq %rdx,%r14 3544 movq 0+0(%rbp),%rax 3545 mulq %r11 3546 imulq %r12,%r15 3547 addq %rax,%r14 3548 adcq %rdx,%r15 3549 movq 8+0(%rbp),%rax 3550 movq %rax,%r9 3551 mulq %r10 3552 addq %rax,%r14 3553 adcq $0,%rdx 3554 movq %rdx,%r10 3555 movq 8+0(%rbp),%rax 3556 mulq %r11 3557 addq %rax,%r15 3558 adcq $0,%rdx 3559 imulq %r12,%r9 3560 addq %r10,%r15 3561 adcq %rdx,%r9 3562 movq %r13,%r10 3563 movq %r14,%r11 3564 movq %r15,%r12 3565 andq $3,%r12 3566 movq %r15,%r13 3567 andq $-4,%r13 3568 movq %r9,%r14 3569 shrdq $2,%r9,%r15 3570 shrq $2,%r9 3571 addq %r13,%r10 3572 adcq %r14,%r11 3573 adcq $0,%r12 3574 addq %r15,%r10 3575 adcq %r9,%r11 3576 adcq $0,%r12 3577 3578 3579 movdqa %xmm4,%xmm0 3580 movdqa %xmm8,%xmm4 3581 movdqa %xmm12,%xmm8 3582 movdqa %xmm1,%xmm12 3583 movdqa %xmm5,%xmm1 3584 movdqa %xmm9,%xmm5 3585 movdqa %xmm13,%xmm9 3586 jmp seal_sse_128_seal 3587 3588seal_sse_tail_16: 3589 testq %rbx,%rbx 3590 jz seal_sse_finalize 3591 3592 movq %rbx,%r8 3593 shlq $4,%r8 3594 leaq .and_masks(%rip),%r13 3595 movq %rbx,%rcx 3596 leaq -1(%rsi,%rbx), %rsi 3597 pxor %xmm15,%xmm15 35981: 3599 pslldq $1,%xmm15 3600 pinsrb $0,(%rsi),%xmm15 3601 leaq -1(%rsi),%rsi 3602 decq %rcx 3603 jne 1b 3604 3605 3606 pxor %xmm0,%xmm15 3607 3608 3609 movq %rbx,%rcx 3610 movdqu %xmm15,%xmm0 36112: 3612 pextrb $0,%xmm0,(%rdi) 3613 psrldq $1,%xmm0 3614 addq $1,%rdi 3615 subq $1,%rcx 3616 jnz 2b 3617 3618 pand -16(%r13,%r8), %xmm15 3619.byte 102,77,15,126,253 3620 pextrq $1,%xmm15,%r14 3621 addq %r13,%r10 3622 adcq %r14,%r11 3623 adcq $1,%r12 3624 movq 0+0(%rbp),%rax 3625 movq %rax,%r15 3626 mulq %r10 3627 movq %rax,%r13 3628 movq %rdx,%r14 3629 movq 0+0(%rbp),%rax 3630 mulq %r11 3631 imulq %r12,%r15 3632 addq %rax,%r14 3633 adcq %rdx,%r15 3634 movq 8+0(%rbp),%rax 3635 movq %rax,%r9 3636 mulq %r10 3637 addq %rax,%r14 3638 adcq $0,%rdx 3639 movq %rdx,%r10 3640 movq 8+0(%rbp),%rax 3641 mulq %r11 3642 addq %rax,%r15 3643 adcq $0,%rdx 3644 imulq %r12,%r9 3645 addq %r10,%r15 3646 adcq %rdx,%r9 3647 movq %r13,%r10 3648 movq %r14,%r11 3649 movq %r15,%r12 3650 andq $3,%r12 3651 movq %r15,%r13 3652 andq $-4,%r13 3653 movq %r9,%r14 3654 shrdq $2,%r9,%r15 3655 shrq $2,%r9 3656 addq %r13,%r10 3657 adcq %r14,%r11 3658 adcq $0,%r12 3659 addq %r15,%r10 3660 adcq %r9,%r11 3661 adcq $0,%r12 3662 3663seal_sse_finalize: 3664 addq 32(%rbp),%r10 3665 adcq 8+32(%rbp),%r11 3666 adcq $1,%r12 3667 movq 0+0(%rbp),%rax 3668 movq %rax,%r15 3669 mulq %r10 3670 movq %rax,%r13 3671 movq %rdx,%r14 3672 movq 0+0(%rbp),%rax 3673 mulq %r11 3674 imulq %r12,%r15 3675 addq %rax,%r14 3676 adcq %rdx,%r15 3677 movq 8+0(%rbp),%rax 3678 movq %rax,%r9 3679 mulq %r10 3680 addq %rax,%r14 3681 adcq $0,%rdx 3682 movq %rdx,%r10 3683 movq 8+0(%rbp),%rax 3684 mulq %r11 3685 addq %rax,%r15 3686 adcq $0,%rdx 3687 imulq %r12,%r9 3688 addq %r10,%r15 3689 adcq %rdx,%r9 3690 movq %r13,%r10 3691 movq %r14,%r11 3692 movq %r15,%r12 3693 andq $3,%r12 3694 movq %r15,%r13 3695 andq $-4,%r13 3696 movq %r9,%r14 3697 shrdq $2,%r9,%r15 3698 shrq $2,%r9 3699 addq %r13,%r10 3700 adcq %r14,%r11 3701 adcq $0,%r12 3702 addq %r15,%r10 3703 adcq %r9,%r11 3704 adcq $0,%r12 3705 3706 3707 movq %r10,%r13 3708 movq %r11,%r14 3709 movq %r12,%r15 3710 subq $-5,%r10 3711 sbbq $-1,%r11 3712 sbbq $3,%r12 3713 cmovcq %r13,%r10 3714 cmovcq %r14,%r11 3715 cmovcq %r15,%r12 3716 3717 addq 0+16(%rbp),%r10 3718 adcq 8+16(%rbp),%r11 3719 3720 addq $288 + 32,%rsp 3721.cfi_adjust_cfa_offset -(288 + 32) 3722 popq %r9 3723.cfi_adjust_cfa_offset -8 3724 movq %r10,0(%r9) 3725 movq %r11,8(%r9) 3726 3727 popq %r15 3728.cfi_adjust_cfa_offset -8 3729 popq %r14 3730.cfi_adjust_cfa_offset -8 3731 popq %r13 3732.cfi_adjust_cfa_offset -8 3733 popq %r12 3734.cfi_adjust_cfa_offset -8 3735 popq %rbx 3736.cfi_adjust_cfa_offset -8 3737 popq %rbp 3738.cfi_adjust_cfa_offset -8 3739 .byte 0xf3,0xc3 3740.cfi_adjust_cfa_offset (8 * 6) + 288 + 32 3741 3742seal_sse_128: 3743 movdqu .chacha20_consts(%rip),%xmm0 3744 movdqa %xmm0,%xmm1 3745 movdqa %xmm0,%xmm2 3746 movdqu 0(%r9),%xmm4 3747 movdqa %xmm4,%xmm5 3748 movdqa %xmm4,%xmm6 3749 movdqu 16(%r9),%xmm8 3750 movdqa %xmm8,%xmm9 3751 movdqa %xmm8,%xmm10 3752 movdqu 32(%r9),%xmm14 3753 movdqa %xmm14,%xmm12 3754 paddd .sse_inc(%rip),%xmm12 3755 movdqa %xmm12,%xmm13 3756 paddd .sse_inc(%rip),%xmm13 3757 movdqa %xmm4,%xmm7 3758 movdqa %xmm8,%xmm11 3759 movdqa %xmm12,%xmm15 3760 movq $10,%r10 37611: 3762 paddd %xmm4,%xmm0 3763 pxor %xmm0,%xmm12 3764 pshufb .rol16(%rip),%xmm12 3765 paddd %xmm12,%xmm8 3766 pxor %xmm8,%xmm4 3767 movdqa %xmm4,%xmm3 3768 pslld $12,%xmm3 3769 psrld $20,%xmm4 3770 pxor %xmm3,%xmm4 3771 paddd %xmm4,%xmm0 3772 pxor %xmm0,%xmm12 3773 pshufb .rol8(%rip),%xmm12 3774 paddd %xmm12,%xmm8 3775 pxor %xmm8,%xmm4 3776 movdqa %xmm4,%xmm3 3777 pslld $7,%xmm3 3778 psrld $25,%xmm4 3779 pxor %xmm3,%xmm4 3780.byte 102,15,58,15,228,4 3781.byte 102,69,15,58,15,192,8 3782.byte 102,69,15,58,15,228,12 3783 paddd %xmm5,%xmm1 3784 pxor %xmm1,%xmm13 3785 pshufb .rol16(%rip),%xmm13 3786 paddd %xmm13,%xmm9 3787 pxor %xmm9,%xmm5 3788 movdqa %xmm5,%xmm3 3789 pslld $12,%xmm3 3790 psrld $20,%xmm5 3791 pxor %xmm3,%xmm5 3792 paddd %xmm5,%xmm1 3793 pxor %xmm1,%xmm13 3794 pshufb .rol8(%rip),%xmm13 3795 paddd %xmm13,%xmm9 3796 pxor %xmm9,%xmm5 3797 movdqa %xmm5,%xmm3 3798 pslld $7,%xmm3 3799 psrld $25,%xmm5 3800 pxor %xmm3,%xmm5 3801.byte 102,15,58,15,237,4 3802.byte 102,69,15,58,15,201,8 3803.byte 102,69,15,58,15,237,12 3804 paddd %xmm6,%xmm2 3805 pxor %xmm2,%xmm14 3806 pshufb .rol16(%rip),%xmm14 3807 paddd %xmm14,%xmm10 3808 pxor %xmm10,%xmm6 3809 movdqa %xmm6,%xmm3 3810 pslld $12,%xmm3 3811 psrld $20,%xmm6 3812 pxor %xmm3,%xmm6 3813 paddd %xmm6,%xmm2 3814 pxor %xmm2,%xmm14 3815 pshufb .rol8(%rip),%xmm14 3816 paddd %xmm14,%xmm10 3817 pxor %xmm10,%xmm6 3818 movdqa %xmm6,%xmm3 3819 pslld $7,%xmm3 3820 psrld $25,%xmm6 3821 pxor %xmm3,%xmm6 3822.byte 102,15,58,15,246,4 3823.byte 102,69,15,58,15,210,8 3824.byte 102,69,15,58,15,246,12 3825 paddd %xmm4,%xmm0 3826 pxor %xmm0,%xmm12 3827 pshufb .rol16(%rip),%xmm12 3828 paddd %xmm12,%xmm8 3829 pxor %xmm8,%xmm4 3830 movdqa %xmm4,%xmm3 3831 pslld $12,%xmm3 3832 psrld $20,%xmm4 3833 pxor %xmm3,%xmm4 3834 paddd %xmm4,%xmm0 3835 pxor %xmm0,%xmm12 3836 pshufb .rol8(%rip),%xmm12 3837 paddd %xmm12,%xmm8 3838 pxor %xmm8,%xmm4 3839 movdqa %xmm4,%xmm3 3840 pslld $7,%xmm3 3841 psrld $25,%xmm4 3842 pxor %xmm3,%xmm4 3843.byte 102,15,58,15,228,12 3844.byte 102,69,15,58,15,192,8 3845.byte 102,69,15,58,15,228,4 3846 paddd %xmm5,%xmm1 3847 pxor %xmm1,%xmm13 3848 pshufb .rol16(%rip),%xmm13 3849 paddd %xmm13,%xmm9 3850 pxor %xmm9,%xmm5 3851 movdqa %xmm5,%xmm3 3852 pslld $12,%xmm3 3853 psrld $20,%xmm5 3854 pxor %xmm3,%xmm5 3855 paddd %xmm5,%xmm1 3856 pxor %xmm1,%xmm13 3857 pshufb .rol8(%rip),%xmm13 3858 paddd %xmm13,%xmm9 3859 pxor %xmm9,%xmm5 3860 movdqa %xmm5,%xmm3 3861 pslld $7,%xmm3 3862 psrld $25,%xmm5 3863 pxor %xmm3,%xmm5 3864.byte 102,15,58,15,237,12 3865.byte 102,69,15,58,15,201,8 3866.byte 102,69,15,58,15,237,4 3867 paddd %xmm6,%xmm2 3868 pxor %xmm2,%xmm14 3869 pshufb .rol16(%rip),%xmm14 3870 paddd %xmm14,%xmm10 3871 pxor %xmm10,%xmm6 3872 movdqa %xmm6,%xmm3 3873 pslld $12,%xmm3 3874 psrld $20,%xmm6 3875 pxor %xmm3,%xmm6 3876 paddd %xmm6,%xmm2 3877 pxor %xmm2,%xmm14 3878 pshufb .rol8(%rip),%xmm14 3879 paddd %xmm14,%xmm10 3880 pxor %xmm10,%xmm6 3881 movdqa %xmm6,%xmm3 3882 pslld $7,%xmm3 3883 psrld $25,%xmm6 3884 pxor %xmm3,%xmm6 3885.byte 102,15,58,15,246,12 3886.byte 102,69,15,58,15,210,8 3887.byte 102,69,15,58,15,246,4 3888 3889 decq %r10 3890 jnz 1b 3891 paddd .chacha20_consts(%rip),%xmm0 3892 paddd .chacha20_consts(%rip),%xmm1 3893 paddd .chacha20_consts(%rip),%xmm2 3894 paddd %xmm7,%xmm4 3895 paddd %xmm7,%xmm5 3896 paddd %xmm7,%xmm6 3897 paddd %xmm11,%xmm8 3898 paddd %xmm11,%xmm9 3899 paddd %xmm15,%xmm12 3900 paddd .sse_inc(%rip),%xmm15 3901 paddd %xmm15,%xmm13 3902 3903 pand .clamp(%rip),%xmm2 3904 movdqa %xmm2,0(%rbp) 3905 movdqa %xmm6,16(%rbp) 3906 3907 movq %r8,%r8 3908 call poly_hash_ad_internal 3909 jmp seal_sse_128_seal 3910.size chacha20_poly1305_seal, .-chacha20_poly1305_seal 3911 3912 3913.type chacha20_poly1305_open_avx2,@function 3914.align 64 3915chacha20_poly1305_open_avx2: 3916 vzeroupper 3917 vmovdqa .chacha20_consts(%rip),%ymm0 3918 vbroadcasti128 0(%r9),%ymm4 3919 vbroadcasti128 16(%r9),%ymm8 3920 vbroadcasti128 32(%r9),%ymm12 3921 vpaddd .avx2_init(%rip),%ymm12,%ymm12 3922 cmpq $192,%rbx 3923 jbe open_avx2_192 3924 cmpq $320,%rbx 3925 jbe open_avx2_320 3926 3927 vmovdqa %ymm4,64(%rbp) 3928 vmovdqa %ymm8,96(%rbp) 3929 vmovdqa %ymm12,160(%rbp) 3930 movq $10,%r10 39311: 3932 vpaddd %ymm4,%ymm0,%ymm0 3933 vpxor %ymm0,%ymm12,%ymm12 3934 vpshufb .rol16(%rip),%ymm12,%ymm12 3935 vpaddd %ymm12,%ymm8,%ymm8 3936 vpxor %ymm8,%ymm4,%ymm4 3937 vpsrld $20,%ymm4,%ymm3 3938 vpslld $12,%ymm4,%ymm4 3939 vpxor %ymm3,%ymm4,%ymm4 3940 vpaddd %ymm4,%ymm0,%ymm0 3941 vpxor %ymm0,%ymm12,%ymm12 3942 vpshufb .rol8(%rip),%ymm12,%ymm12 3943 vpaddd %ymm12,%ymm8,%ymm8 3944 vpxor %ymm8,%ymm4,%ymm4 3945 vpslld $7,%ymm4,%ymm3 3946 vpsrld $25,%ymm4,%ymm4 3947 vpxor %ymm3,%ymm4,%ymm4 3948 vpalignr $12,%ymm12,%ymm12,%ymm12 3949 vpalignr $8,%ymm8,%ymm8,%ymm8 3950 vpalignr $4,%ymm4,%ymm4,%ymm4 3951 vpaddd %ymm4,%ymm0,%ymm0 3952 vpxor %ymm0,%ymm12,%ymm12 3953 vpshufb .rol16(%rip),%ymm12,%ymm12 3954 vpaddd %ymm12,%ymm8,%ymm8 3955 vpxor %ymm8,%ymm4,%ymm4 3956 vpsrld $20,%ymm4,%ymm3 3957 vpslld $12,%ymm4,%ymm4 3958 vpxor %ymm3,%ymm4,%ymm4 3959 vpaddd %ymm4,%ymm0,%ymm0 3960 vpxor %ymm0,%ymm12,%ymm12 3961 vpshufb .rol8(%rip),%ymm12,%ymm12 3962 vpaddd %ymm12,%ymm8,%ymm8 3963 vpxor %ymm8,%ymm4,%ymm4 3964 vpslld $7,%ymm4,%ymm3 3965 vpsrld $25,%ymm4,%ymm4 3966 vpxor %ymm3,%ymm4,%ymm4 3967 vpalignr $4,%ymm12,%ymm12,%ymm12 3968 vpalignr $8,%ymm8,%ymm8,%ymm8 3969 vpalignr $12,%ymm4,%ymm4,%ymm4 3970 3971 decq %r10 3972 jne 1b 3973 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 3974 vpaddd 64(%rbp),%ymm4,%ymm4 3975 vpaddd 96(%rbp),%ymm8,%ymm8 3976 vpaddd 160(%rbp),%ymm12,%ymm12 3977 3978 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 3979 3980 vpand .clamp(%rip),%ymm3,%ymm3 3981 vmovdqa %ymm3,0(%rbp) 3982 3983 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 3984 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 3985 3986 movq %r8,%r8 3987 call poly_hash_ad_internal 3988 xorq %rcx,%rcx 3989 39901: 3991 addq 0(%rsi,%rcx), %r10 3992 adcq 8+0(%rsi,%rcx), %r11 3993 adcq $1,%r12 3994 movq 0+0(%rbp),%rax 3995 movq %rax,%r15 3996 mulq %r10 3997 movq %rax,%r13 3998 movq %rdx,%r14 3999 movq 0+0(%rbp),%rax 4000 mulq %r11 4001 imulq %r12,%r15 4002 addq %rax,%r14 4003 adcq %rdx,%r15 4004 movq 8+0(%rbp),%rax 4005 movq %rax,%r9 4006 mulq %r10 4007 addq %rax,%r14 4008 adcq $0,%rdx 4009 movq %rdx,%r10 4010 movq 8+0(%rbp),%rax 4011 mulq %r11 4012 addq %rax,%r15 4013 adcq $0,%rdx 4014 imulq %r12,%r9 4015 addq %r10,%r15 4016 adcq %rdx,%r9 4017 movq %r13,%r10 4018 movq %r14,%r11 4019 movq %r15,%r12 4020 andq $3,%r12 4021 movq %r15,%r13 4022 andq $-4,%r13 4023 movq %r9,%r14 4024 shrdq $2,%r9,%r15 4025 shrq $2,%r9 4026 addq %r13,%r10 4027 adcq %r14,%r11 4028 adcq $0,%r12 4029 addq %r15,%r10 4030 adcq %r9,%r11 4031 adcq $0,%r12 4032 4033 addq $16,%rcx 4034 cmpq $64,%rcx 4035 jne 1b 4036 4037 vpxor 0(%rsi),%ymm0,%ymm0 4038 vpxor 32(%rsi),%ymm4,%ymm4 4039 vmovdqu %ymm0,0(%rdi) 4040 vmovdqu %ymm4,32(%rdi) 4041 leaq 64(%rsi),%rsi 4042 leaq 64(%rdi),%rdi 4043 subq $64,%rbx 40441: 4045 4046 cmpq $512,%rbx 4047 jb 3f 4048 vmovdqa .chacha20_consts(%rip),%ymm0 4049 vmovdqa 64(%rbp),%ymm4 4050 vmovdqa 96(%rbp),%ymm8 4051 vmovdqa %ymm0,%ymm1 4052 vmovdqa %ymm4,%ymm5 4053 vmovdqa %ymm8,%ymm9 4054 vmovdqa %ymm0,%ymm2 4055 vmovdqa %ymm4,%ymm6 4056 vmovdqa %ymm8,%ymm10 4057 vmovdqa %ymm0,%ymm3 4058 vmovdqa %ymm4,%ymm7 4059 vmovdqa %ymm8,%ymm11 4060 vmovdqa .avx2_inc(%rip),%ymm12 4061 vpaddd 160(%rbp),%ymm12,%ymm15 4062 vpaddd %ymm15,%ymm12,%ymm14 4063 vpaddd %ymm14,%ymm12,%ymm13 4064 vpaddd %ymm13,%ymm12,%ymm12 4065 vmovdqa %ymm15,256(%rbp) 4066 vmovdqa %ymm14,224(%rbp) 4067 vmovdqa %ymm13,192(%rbp) 4068 vmovdqa %ymm12,160(%rbp) 4069 4070 xorq %rcx,%rcx 40712: 4072 addq 0*8(%rsi,%rcx), %r10 4073 adcq 8+0*8(%rsi,%rcx), %r11 4074 adcq $1,%r12 4075 vmovdqa %ymm8,128(%rbp) 4076 vmovdqa .rol16(%rip),%ymm8 4077 vpaddd %ymm7,%ymm3,%ymm3 4078 vpaddd %ymm6,%ymm2,%ymm2 4079 vpaddd %ymm5,%ymm1,%ymm1 4080 vpaddd %ymm4,%ymm0,%ymm0 4081 vpxor %ymm3,%ymm15,%ymm15 4082 vpxor %ymm2,%ymm14,%ymm14 4083 vpxor %ymm1,%ymm13,%ymm13 4084 vpxor %ymm0,%ymm12,%ymm12 4085 movq 0+0(%rbp),%rdx 4086 movq %rdx,%r15 4087 mulxq %r10,%r13,%r14 4088 mulxq %r11,%rax,%rdx 4089 imulq %r12,%r15 4090 addq %rax,%r14 4091 adcq %rdx,%r15 4092 vpshufb %ymm8,%ymm15,%ymm15 4093 vpshufb %ymm8,%ymm14,%ymm14 4094 vpshufb %ymm8,%ymm13,%ymm13 4095 vpshufb %ymm8,%ymm12,%ymm12 4096 vmovdqa 128(%rbp),%ymm8 4097 vpaddd %ymm15,%ymm11,%ymm11 4098 vpaddd %ymm14,%ymm10,%ymm10 4099 vpaddd %ymm13,%ymm9,%ymm9 4100 vpaddd %ymm12,%ymm8,%ymm8 4101 movq 8+0(%rbp),%rdx 4102 mulxq %r10,%r10,%rax 4103 addq %r10,%r14 4104 mulxq %r11,%r11,%r9 4105 adcq %r11,%r15 4106 adcq $0,%r9 4107 imulq %r12,%rdx 4108 vpxor %ymm11,%ymm7,%ymm7 4109 vpxor %ymm10,%ymm6,%ymm6 4110 vpxor %ymm9,%ymm5,%ymm5 4111 vpxor %ymm8,%ymm4,%ymm4 4112 vmovdqa %ymm8,128(%rbp) 4113 vpsrld $20,%ymm7,%ymm8 4114 vpslld $32-20,%ymm7,%ymm7 4115 vpxor %ymm8,%ymm7,%ymm7 4116 vpsrld $20,%ymm6,%ymm8 4117 vpslld $32-20,%ymm6,%ymm6 4118 vpxor %ymm8,%ymm6,%ymm6 4119 vpsrld $20,%ymm5,%ymm8 4120 addq %rax,%r15 4121 adcq %rdx,%r9 4122 vpslld $32-20,%ymm5,%ymm5 4123 vpxor %ymm8,%ymm5,%ymm5 4124 vpsrld $20,%ymm4,%ymm8 4125 vpslld $32-20,%ymm4,%ymm4 4126 vpxor %ymm8,%ymm4,%ymm4 4127 vmovdqa .rol8(%rip),%ymm8 4128 vpaddd %ymm7,%ymm3,%ymm3 4129 vpaddd %ymm6,%ymm2,%ymm2 4130 vpaddd %ymm5,%ymm1,%ymm1 4131 vpaddd %ymm4,%ymm0,%ymm0 4132 movq %r13,%r10 4133 movq %r14,%r11 4134 movq %r15,%r12 4135 andq $3,%r12 4136 movq %r15,%r13 4137 andq $-4,%r13 4138 movq %r9,%r14 4139 shrdq $2,%r9,%r15 4140 shrq $2,%r9 4141 addq %r13,%r10 4142 adcq %r14,%r11 4143 adcq $0,%r12 4144 addq %r15,%r10 4145 adcq %r9,%r11 4146 adcq $0,%r12 4147 vpxor %ymm3,%ymm15,%ymm15 4148 vpxor %ymm2,%ymm14,%ymm14 4149 vpxor %ymm1,%ymm13,%ymm13 4150 vpxor %ymm0,%ymm12,%ymm12 4151 vpshufb %ymm8,%ymm15,%ymm15 4152 vpshufb %ymm8,%ymm14,%ymm14 4153 vpshufb %ymm8,%ymm13,%ymm13 4154 vpshufb %ymm8,%ymm12,%ymm12 4155 vmovdqa 128(%rbp),%ymm8 4156 addq 2*8(%rsi,%rcx), %r10 4157 adcq 8+2*8(%rsi,%rcx), %r11 4158 adcq $1,%r12 4159 vpaddd %ymm15,%ymm11,%ymm11 4160 vpaddd %ymm14,%ymm10,%ymm10 4161 vpaddd %ymm13,%ymm9,%ymm9 4162 vpaddd %ymm12,%ymm8,%ymm8 4163 vpxor %ymm11,%ymm7,%ymm7 4164 vpxor %ymm10,%ymm6,%ymm6 4165 vpxor %ymm9,%ymm5,%ymm5 4166 vpxor %ymm8,%ymm4,%ymm4 4167 movq 0+0(%rbp),%rdx 4168 movq %rdx,%r15 4169 mulxq %r10,%r13,%r14 4170 mulxq %r11,%rax,%rdx 4171 imulq %r12,%r15 4172 addq %rax,%r14 4173 adcq %rdx,%r15 4174 vmovdqa %ymm8,128(%rbp) 4175 vpsrld $25,%ymm7,%ymm8 4176 vpslld $32-25,%ymm7,%ymm7 4177 vpxor %ymm8,%ymm7,%ymm7 4178 vpsrld $25,%ymm6,%ymm8 4179 vpslld $32-25,%ymm6,%ymm6 4180 vpxor %ymm8,%ymm6,%ymm6 4181 vpsrld $25,%ymm5,%ymm8 4182 vpslld $32-25,%ymm5,%ymm5 4183 vpxor %ymm8,%ymm5,%ymm5 4184 vpsrld $25,%ymm4,%ymm8 4185 vpslld $32-25,%ymm4,%ymm4 4186 vpxor %ymm8,%ymm4,%ymm4 4187 vmovdqa 128(%rbp),%ymm8 4188 vpalignr $4,%ymm7,%ymm7,%ymm7 4189 vpalignr $8,%ymm11,%ymm11,%ymm11 4190 vpalignr $12,%ymm15,%ymm15,%ymm15 4191 vpalignr $4,%ymm6,%ymm6,%ymm6 4192 movq 8+0(%rbp),%rdx 4193 mulxq %r10,%r10,%rax 4194 addq %r10,%r14 4195 mulxq %r11,%r11,%r9 4196 adcq %r11,%r15 4197 adcq $0,%r9 4198 imulq %r12,%rdx 4199 vpalignr $8,%ymm10,%ymm10,%ymm10 4200 vpalignr $12,%ymm14,%ymm14,%ymm14 4201 vpalignr $4,%ymm5,%ymm5,%ymm5 4202 vpalignr $8,%ymm9,%ymm9,%ymm9 4203 vpalignr $12,%ymm13,%ymm13,%ymm13 4204 vpalignr $4,%ymm4,%ymm4,%ymm4 4205 vpalignr $8,%ymm8,%ymm8,%ymm8 4206 vpalignr $12,%ymm12,%ymm12,%ymm12 4207 vmovdqa %ymm8,128(%rbp) 4208 vmovdqa .rol16(%rip),%ymm8 4209 vpaddd %ymm7,%ymm3,%ymm3 4210 vpaddd %ymm6,%ymm2,%ymm2 4211 vpaddd %ymm5,%ymm1,%ymm1 4212 vpaddd %ymm4,%ymm0,%ymm0 4213 vpxor %ymm3,%ymm15,%ymm15 4214 vpxor %ymm2,%ymm14,%ymm14 4215 vpxor %ymm1,%ymm13,%ymm13 4216 vpxor %ymm0,%ymm12,%ymm12 4217 addq %rax,%r15 4218 adcq %rdx,%r9 4219 vpshufb %ymm8,%ymm15,%ymm15 4220 vpshufb %ymm8,%ymm14,%ymm14 4221 vpshufb %ymm8,%ymm13,%ymm13 4222 vpshufb %ymm8,%ymm12,%ymm12 4223 vmovdqa 128(%rbp),%ymm8 4224 vpaddd %ymm15,%ymm11,%ymm11 4225 vpaddd %ymm14,%ymm10,%ymm10 4226 vpaddd %ymm13,%ymm9,%ymm9 4227 vpaddd %ymm12,%ymm8,%ymm8 4228 movq %r13,%r10 4229 movq %r14,%r11 4230 movq %r15,%r12 4231 andq $3,%r12 4232 movq %r15,%r13 4233 andq $-4,%r13 4234 movq %r9,%r14 4235 shrdq $2,%r9,%r15 4236 shrq $2,%r9 4237 addq %r13,%r10 4238 adcq %r14,%r11 4239 adcq $0,%r12 4240 addq %r15,%r10 4241 adcq %r9,%r11 4242 adcq $0,%r12 4243 vpxor %ymm11,%ymm7,%ymm7 4244 vpxor %ymm10,%ymm6,%ymm6 4245 vpxor %ymm9,%ymm5,%ymm5 4246 vpxor %ymm8,%ymm4,%ymm4 4247 vmovdqa %ymm8,128(%rbp) 4248 vpsrld $20,%ymm7,%ymm8 4249 vpslld $32-20,%ymm7,%ymm7 4250 vpxor %ymm8,%ymm7,%ymm7 4251 addq 4*8(%rsi,%rcx), %r10 4252 adcq 8+4*8(%rsi,%rcx), %r11 4253 adcq $1,%r12 4254 4255 leaq 48(%rcx),%rcx 4256 vpsrld $20,%ymm6,%ymm8 4257 vpslld $32-20,%ymm6,%ymm6 4258 vpxor %ymm8,%ymm6,%ymm6 4259 vpsrld $20,%ymm5,%ymm8 4260 vpslld $32-20,%ymm5,%ymm5 4261 vpxor %ymm8,%ymm5,%ymm5 4262 vpsrld $20,%ymm4,%ymm8 4263 vpslld $32-20,%ymm4,%ymm4 4264 vpxor %ymm8,%ymm4,%ymm4 4265 vmovdqa .rol8(%rip),%ymm8 4266 vpaddd %ymm7,%ymm3,%ymm3 4267 vpaddd %ymm6,%ymm2,%ymm2 4268 vpaddd %ymm5,%ymm1,%ymm1 4269 vpaddd %ymm4,%ymm0,%ymm0 4270 vpxor %ymm3,%ymm15,%ymm15 4271 vpxor %ymm2,%ymm14,%ymm14 4272 vpxor %ymm1,%ymm13,%ymm13 4273 vpxor %ymm0,%ymm12,%ymm12 4274 movq 0+0(%rbp),%rdx 4275 movq %rdx,%r15 4276 mulxq %r10,%r13,%r14 4277 mulxq %r11,%rax,%rdx 4278 imulq %r12,%r15 4279 addq %rax,%r14 4280 adcq %rdx,%r15 4281 vpshufb %ymm8,%ymm15,%ymm15 4282 vpshufb %ymm8,%ymm14,%ymm14 4283 vpshufb %ymm8,%ymm13,%ymm13 4284 vpshufb %ymm8,%ymm12,%ymm12 4285 vmovdqa 128(%rbp),%ymm8 4286 vpaddd %ymm15,%ymm11,%ymm11 4287 vpaddd %ymm14,%ymm10,%ymm10 4288 vpaddd %ymm13,%ymm9,%ymm9 4289 movq 8+0(%rbp),%rdx 4290 mulxq %r10,%r10,%rax 4291 addq %r10,%r14 4292 mulxq %r11,%r11,%r9 4293 adcq %r11,%r15 4294 adcq $0,%r9 4295 imulq %r12,%rdx 4296 vpaddd %ymm12,%ymm8,%ymm8 4297 vpxor %ymm11,%ymm7,%ymm7 4298 vpxor %ymm10,%ymm6,%ymm6 4299 vpxor %ymm9,%ymm5,%ymm5 4300 vpxor %ymm8,%ymm4,%ymm4 4301 vmovdqa %ymm8,128(%rbp) 4302 vpsrld $25,%ymm7,%ymm8 4303 vpslld $32-25,%ymm7,%ymm7 4304 addq %rax,%r15 4305 adcq %rdx,%r9 4306 vpxor %ymm8,%ymm7,%ymm7 4307 vpsrld $25,%ymm6,%ymm8 4308 vpslld $32-25,%ymm6,%ymm6 4309 vpxor %ymm8,%ymm6,%ymm6 4310 vpsrld $25,%ymm5,%ymm8 4311 vpslld $32-25,%ymm5,%ymm5 4312 vpxor %ymm8,%ymm5,%ymm5 4313 vpsrld $25,%ymm4,%ymm8 4314 vpslld $32-25,%ymm4,%ymm4 4315 vpxor %ymm8,%ymm4,%ymm4 4316 vmovdqa 128(%rbp),%ymm8 4317 vpalignr $12,%ymm7,%ymm7,%ymm7 4318 vpalignr $8,%ymm11,%ymm11,%ymm11 4319 vpalignr $4,%ymm15,%ymm15,%ymm15 4320 vpalignr $12,%ymm6,%ymm6,%ymm6 4321 vpalignr $8,%ymm10,%ymm10,%ymm10 4322 vpalignr $4,%ymm14,%ymm14,%ymm14 4323 vpalignr $12,%ymm5,%ymm5,%ymm5 4324 movq %r13,%r10 4325 movq %r14,%r11 4326 movq %r15,%r12 4327 andq $3,%r12 4328 movq %r15,%r13 4329 andq $-4,%r13 4330 movq %r9,%r14 4331 shrdq $2,%r9,%r15 4332 shrq $2,%r9 4333 addq %r13,%r10 4334 adcq %r14,%r11 4335 adcq $0,%r12 4336 addq %r15,%r10 4337 adcq %r9,%r11 4338 adcq $0,%r12 4339 vpalignr $8,%ymm9,%ymm9,%ymm9 4340 vpalignr $4,%ymm13,%ymm13,%ymm13 4341 vpalignr $12,%ymm4,%ymm4,%ymm4 4342 vpalignr $8,%ymm8,%ymm8,%ymm8 4343 vpalignr $4,%ymm12,%ymm12,%ymm12 4344 4345 cmpq $60*8,%rcx 4346 jne 2b 4347 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 4348 vpaddd 64(%rbp),%ymm7,%ymm7 4349 vpaddd 96(%rbp),%ymm11,%ymm11 4350 vpaddd 256(%rbp),%ymm15,%ymm15 4351 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 4352 vpaddd 64(%rbp),%ymm6,%ymm6 4353 vpaddd 96(%rbp),%ymm10,%ymm10 4354 vpaddd 224(%rbp),%ymm14,%ymm14 4355 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 4356 vpaddd 64(%rbp),%ymm5,%ymm5 4357 vpaddd 96(%rbp),%ymm9,%ymm9 4358 vpaddd 192(%rbp),%ymm13,%ymm13 4359 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4360 vpaddd 64(%rbp),%ymm4,%ymm4 4361 vpaddd 96(%rbp),%ymm8,%ymm8 4362 vpaddd 160(%rbp),%ymm12,%ymm12 4363 4364 vmovdqa %ymm0,128(%rbp) 4365 addq 60*8(%rsi),%r10 4366 adcq 8+60*8(%rsi),%r11 4367 adcq $1,%r12 4368 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 4369 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 4370 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 4371 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 4372 vpxor 0+0(%rsi),%ymm0,%ymm0 4373 vpxor 32+0(%rsi),%ymm3,%ymm3 4374 vpxor 64+0(%rsi),%ymm7,%ymm7 4375 vpxor 96+0(%rsi),%ymm11,%ymm11 4376 vmovdqu %ymm0,0+0(%rdi) 4377 vmovdqu %ymm3,32+0(%rdi) 4378 vmovdqu %ymm7,64+0(%rdi) 4379 vmovdqu %ymm11,96+0(%rdi) 4380 4381 vmovdqa 128(%rbp),%ymm0 4382 movq 0+0(%rbp),%rax 4383 movq %rax,%r15 4384 mulq %r10 4385 movq %rax,%r13 4386 movq %rdx,%r14 4387 movq 0+0(%rbp),%rax 4388 mulq %r11 4389 imulq %r12,%r15 4390 addq %rax,%r14 4391 adcq %rdx,%r15 4392 movq 8+0(%rbp),%rax 4393 movq %rax,%r9 4394 mulq %r10 4395 addq %rax,%r14 4396 adcq $0,%rdx 4397 movq %rdx,%r10 4398 movq 8+0(%rbp),%rax 4399 mulq %r11 4400 addq %rax,%r15 4401 adcq $0,%rdx 4402 imulq %r12,%r9 4403 addq %r10,%r15 4404 adcq %rdx,%r9 4405 movq %r13,%r10 4406 movq %r14,%r11 4407 movq %r15,%r12 4408 andq $3,%r12 4409 movq %r15,%r13 4410 andq $-4,%r13 4411 movq %r9,%r14 4412 shrdq $2,%r9,%r15 4413 shrq $2,%r9 4414 addq %r13,%r10 4415 adcq %r14,%r11 4416 adcq $0,%r12 4417 addq %r15,%r10 4418 adcq %r9,%r11 4419 adcq $0,%r12 4420 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 4421 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 4422 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 4423 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 4424 vpxor 0+128(%rsi),%ymm3,%ymm3 4425 vpxor 32+128(%rsi),%ymm2,%ymm2 4426 vpxor 64+128(%rsi),%ymm6,%ymm6 4427 vpxor 96+128(%rsi),%ymm10,%ymm10 4428 vmovdqu %ymm3,0+128(%rdi) 4429 vmovdqu %ymm2,32+128(%rdi) 4430 vmovdqu %ymm6,64+128(%rdi) 4431 vmovdqu %ymm10,96+128(%rdi) 4432 addq 60*8+16(%rsi),%r10 4433 adcq 8+60*8+16(%rsi),%r11 4434 adcq $1,%r12 4435 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4436 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4437 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4438 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4439 vpxor 0+256(%rsi),%ymm3,%ymm3 4440 vpxor 32+256(%rsi),%ymm1,%ymm1 4441 vpxor 64+256(%rsi),%ymm5,%ymm5 4442 vpxor 96+256(%rsi),%ymm9,%ymm9 4443 vmovdqu %ymm3,0+256(%rdi) 4444 vmovdqu %ymm1,32+256(%rdi) 4445 vmovdqu %ymm5,64+256(%rdi) 4446 vmovdqu %ymm9,96+256(%rdi) 4447 movq 0+0(%rbp),%rax 4448 movq %rax,%r15 4449 mulq %r10 4450 movq %rax,%r13 4451 movq %rdx,%r14 4452 movq 0+0(%rbp),%rax 4453 mulq %r11 4454 imulq %r12,%r15 4455 addq %rax,%r14 4456 adcq %rdx,%r15 4457 movq 8+0(%rbp),%rax 4458 movq %rax,%r9 4459 mulq %r10 4460 addq %rax,%r14 4461 adcq $0,%rdx 4462 movq %rdx,%r10 4463 movq 8+0(%rbp),%rax 4464 mulq %r11 4465 addq %rax,%r15 4466 adcq $0,%rdx 4467 imulq %r12,%r9 4468 addq %r10,%r15 4469 adcq %rdx,%r9 4470 movq %r13,%r10 4471 movq %r14,%r11 4472 movq %r15,%r12 4473 andq $3,%r12 4474 movq %r15,%r13 4475 andq $-4,%r13 4476 movq %r9,%r14 4477 shrdq $2,%r9,%r15 4478 shrq $2,%r9 4479 addq %r13,%r10 4480 adcq %r14,%r11 4481 adcq $0,%r12 4482 addq %r15,%r10 4483 adcq %r9,%r11 4484 adcq $0,%r12 4485 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 4486 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 4487 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 4488 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 4489 vpxor 0+384(%rsi),%ymm3,%ymm3 4490 vpxor 32+384(%rsi),%ymm0,%ymm0 4491 vpxor 64+384(%rsi),%ymm4,%ymm4 4492 vpxor 96+384(%rsi),%ymm8,%ymm8 4493 vmovdqu %ymm3,0+384(%rdi) 4494 vmovdqu %ymm0,32+384(%rdi) 4495 vmovdqu %ymm4,64+384(%rdi) 4496 vmovdqu %ymm8,96+384(%rdi) 4497 4498 leaq 512(%rsi),%rsi 4499 leaq 512(%rdi),%rdi 4500 subq $512,%rbx 4501 jmp 1b 45023: 4503 testq %rbx,%rbx 4504 vzeroupper 4505 je open_sse_finalize 45063: 4507 cmpq $128,%rbx 4508 ja 3f 4509 vmovdqa .chacha20_consts(%rip),%ymm0 4510 vmovdqa 64(%rbp),%ymm4 4511 vmovdqa 96(%rbp),%ymm8 4512 vmovdqa .avx2_inc(%rip),%ymm12 4513 vpaddd 160(%rbp),%ymm12,%ymm12 4514 vmovdqa %ymm12,160(%rbp) 4515 4516 xorq %r8,%r8 4517 movq %rbx,%rcx 4518 andq $-16,%rcx 4519 testq %rcx,%rcx 4520 je 2f 45211: 4522 addq 0*8(%rsi,%r8), %r10 4523 adcq 8+0*8(%rsi,%r8), %r11 4524 adcq $1,%r12 4525 movq 0+0(%rbp),%rax 4526 movq %rax,%r15 4527 mulq %r10 4528 movq %rax,%r13 4529 movq %rdx,%r14 4530 movq 0+0(%rbp),%rax 4531 mulq %r11 4532 imulq %r12,%r15 4533 addq %rax,%r14 4534 adcq %rdx,%r15 4535 movq 8+0(%rbp),%rax 4536 movq %rax,%r9 4537 mulq %r10 4538 addq %rax,%r14 4539 adcq $0,%rdx 4540 movq %rdx,%r10 4541 movq 8+0(%rbp),%rax 4542 mulq %r11 4543 addq %rax,%r15 4544 adcq $0,%rdx 4545 imulq %r12,%r9 4546 addq %r10,%r15 4547 adcq %rdx,%r9 4548 movq %r13,%r10 4549 movq %r14,%r11 4550 movq %r15,%r12 4551 andq $3,%r12 4552 movq %r15,%r13 4553 andq $-4,%r13 4554 movq %r9,%r14 4555 shrdq $2,%r9,%r15 4556 shrq $2,%r9 4557 addq %r13,%r10 4558 adcq %r14,%r11 4559 adcq $0,%r12 4560 addq %r15,%r10 4561 adcq %r9,%r11 4562 adcq $0,%r12 4563 45642: 4565 addq $16,%r8 4566 vpaddd %ymm4,%ymm0,%ymm0 4567 vpxor %ymm0,%ymm12,%ymm12 4568 vpshufb .rol16(%rip),%ymm12,%ymm12 4569 vpaddd %ymm12,%ymm8,%ymm8 4570 vpxor %ymm8,%ymm4,%ymm4 4571 vpsrld $20,%ymm4,%ymm3 4572 vpslld $12,%ymm4,%ymm4 4573 vpxor %ymm3,%ymm4,%ymm4 4574 vpaddd %ymm4,%ymm0,%ymm0 4575 vpxor %ymm0,%ymm12,%ymm12 4576 vpshufb .rol8(%rip),%ymm12,%ymm12 4577 vpaddd %ymm12,%ymm8,%ymm8 4578 vpxor %ymm8,%ymm4,%ymm4 4579 vpslld $7,%ymm4,%ymm3 4580 vpsrld $25,%ymm4,%ymm4 4581 vpxor %ymm3,%ymm4,%ymm4 4582 vpalignr $12,%ymm12,%ymm12,%ymm12 4583 vpalignr $8,%ymm8,%ymm8,%ymm8 4584 vpalignr $4,%ymm4,%ymm4,%ymm4 4585 vpaddd %ymm4,%ymm0,%ymm0 4586 vpxor %ymm0,%ymm12,%ymm12 4587 vpshufb .rol16(%rip),%ymm12,%ymm12 4588 vpaddd %ymm12,%ymm8,%ymm8 4589 vpxor %ymm8,%ymm4,%ymm4 4590 vpsrld $20,%ymm4,%ymm3 4591 vpslld $12,%ymm4,%ymm4 4592 vpxor %ymm3,%ymm4,%ymm4 4593 vpaddd %ymm4,%ymm0,%ymm0 4594 vpxor %ymm0,%ymm12,%ymm12 4595 vpshufb .rol8(%rip),%ymm12,%ymm12 4596 vpaddd %ymm12,%ymm8,%ymm8 4597 vpxor %ymm8,%ymm4,%ymm4 4598 vpslld $7,%ymm4,%ymm3 4599 vpsrld $25,%ymm4,%ymm4 4600 vpxor %ymm3,%ymm4,%ymm4 4601 vpalignr $4,%ymm12,%ymm12,%ymm12 4602 vpalignr $8,%ymm8,%ymm8,%ymm8 4603 vpalignr $12,%ymm4,%ymm4,%ymm4 4604 4605 cmpq %rcx,%r8 4606 jb 1b 4607 cmpq $160,%r8 4608 jne 2b 4609 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4610 vpaddd 64(%rbp),%ymm4,%ymm4 4611 vpaddd 96(%rbp),%ymm8,%ymm8 4612 vpaddd 160(%rbp),%ymm12,%ymm12 4613 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4614 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4615 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4616 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4617 vmovdqa %ymm3,%ymm8 4618 4619 jmp open_avx2_tail_loop 46203: 4621 cmpq $256,%rbx 4622 ja 3f 4623 vmovdqa .chacha20_consts(%rip),%ymm0 4624 vmovdqa 64(%rbp),%ymm4 4625 vmovdqa 96(%rbp),%ymm8 4626 vmovdqa %ymm0,%ymm1 4627 vmovdqa %ymm4,%ymm5 4628 vmovdqa %ymm8,%ymm9 4629 vmovdqa .avx2_inc(%rip),%ymm12 4630 vpaddd 160(%rbp),%ymm12,%ymm13 4631 vpaddd %ymm13,%ymm12,%ymm12 4632 vmovdqa %ymm12,160(%rbp) 4633 vmovdqa %ymm13,192(%rbp) 4634 4635 movq %rbx,128(%rbp) 4636 movq %rbx,%rcx 4637 subq $128,%rcx 4638 shrq $4,%rcx 4639 movq $10,%r8 4640 cmpq $10,%rcx 4641 cmovgq %r8,%rcx 4642 movq %rsi,%rbx 4643 xorq %r8,%r8 46441: 4645 addq 0(%rbx),%r10 4646 adcq 8+0(%rbx),%r11 4647 adcq $1,%r12 4648 movq 0+0(%rbp),%rdx 4649 movq %rdx,%r15 4650 mulxq %r10,%r13,%r14 4651 mulxq %r11,%rax,%rdx 4652 imulq %r12,%r15 4653 addq %rax,%r14 4654 adcq %rdx,%r15 4655 movq 8+0(%rbp),%rdx 4656 mulxq %r10,%r10,%rax 4657 addq %r10,%r14 4658 mulxq %r11,%r11,%r9 4659 adcq %r11,%r15 4660 adcq $0,%r9 4661 imulq %r12,%rdx 4662 addq %rax,%r15 4663 adcq %rdx,%r9 4664 movq %r13,%r10 4665 movq %r14,%r11 4666 movq %r15,%r12 4667 andq $3,%r12 4668 movq %r15,%r13 4669 andq $-4,%r13 4670 movq %r9,%r14 4671 shrdq $2,%r9,%r15 4672 shrq $2,%r9 4673 addq %r13,%r10 4674 adcq %r14,%r11 4675 adcq $0,%r12 4676 addq %r15,%r10 4677 adcq %r9,%r11 4678 adcq $0,%r12 4679 4680 leaq 16(%rbx),%rbx 46812: 4682 vpaddd %ymm4,%ymm0,%ymm0 4683 vpxor %ymm0,%ymm12,%ymm12 4684 vpshufb .rol16(%rip),%ymm12,%ymm12 4685 vpaddd %ymm12,%ymm8,%ymm8 4686 vpxor %ymm8,%ymm4,%ymm4 4687 vpsrld $20,%ymm4,%ymm3 4688 vpslld $12,%ymm4,%ymm4 4689 vpxor %ymm3,%ymm4,%ymm4 4690 vpaddd %ymm4,%ymm0,%ymm0 4691 vpxor %ymm0,%ymm12,%ymm12 4692 vpshufb .rol8(%rip),%ymm12,%ymm12 4693 vpaddd %ymm12,%ymm8,%ymm8 4694 vpxor %ymm8,%ymm4,%ymm4 4695 vpslld $7,%ymm4,%ymm3 4696 vpsrld $25,%ymm4,%ymm4 4697 vpxor %ymm3,%ymm4,%ymm4 4698 vpalignr $12,%ymm12,%ymm12,%ymm12 4699 vpalignr $8,%ymm8,%ymm8,%ymm8 4700 vpalignr $4,%ymm4,%ymm4,%ymm4 4701 vpaddd %ymm5,%ymm1,%ymm1 4702 vpxor %ymm1,%ymm13,%ymm13 4703 vpshufb .rol16(%rip),%ymm13,%ymm13 4704 vpaddd %ymm13,%ymm9,%ymm9 4705 vpxor %ymm9,%ymm5,%ymm5 4706 vpsrld $20,%ymm5,%ymm3 4707 vpslld $12,%ymm5,%ymm5 4708 vpxor %ymm3,%ymm5,%ymm5 4709 vpaddd %ymm5,%ymm1,%ymm1 4710 vpxor %ymm1,%ymm13,%ymm13 4711 vpshufb .rol8(%rip),%ymm13,%ymm13 4712 vpaddd %ymm13,%ymm9,%ymm9 4713 vpxor %ymm9,%ymm5,%ymm5 4714 vpslld $7,%ymm5,%ymm3 4715 vpsrld $25,%ymm5,%ymm5 4716 vpxor %ymm3,%ymm5,%ymm5 4717 vpalignr $12,%ymm13,%ymm13,%ymm13 4718 vpalignr $8,%ymm9,%ymm9,%ymm9 4719 vpalignr $4,%ymm5,%ymm5,%ymm5 4720 4721 incq %r8 4722 vpaddd %ymm4,%ymm0,%ymm0 4723 vpxor %ymm0,%ymm12,%ymm12 4724 vpshufb .rol16(%rip),%ymm12,%ymm12 4725 vpaddd %ymm12,%ymm8,%ymm8 4726 vpxor %ymm8,%ymm4,%ymm4 4727 vpsrld $20,%ymm4,%ymm3 4728 vpslld $12,%ymm4,%ymm4 4729 vpxor %ymm3,%ymm4,%ymm4 4730 vpaddd %ymm4,%ymm0,%ymm0 4731 vpxor %ymm0,%ymm12,%ymm12 4732 vpshufb .rol8(%rip),%ymm12,%ymm12 4733 vpaddd %ymm12,%ymm8,%ymm8 4734 vpxor %ymm8,%ymm4,%ymm4 4735 vpslld $7,%ymm4,%ymm3 4736 vpsrld $25,%ymm4,%ymm4 4737 vpxor %ymm3,%ymm4,%ymm4 4738 vpalignr $4,%ymm12,%ymm12,%ymm12 4739 vpalignr $8,%ymm8,%ymm8,%ymm8 4740 vpalignr $12,%ymm4,%ymm4,%ymm4 4741 vpaddd %ymm5,%ymm1,%ymm1 4742 vpxor %ymm1,%ymm13,%ymm13 4743 vpshufb .rol16(%rip),%ymm13,%ymm13 4744 vpaddd %ymm13,%ymm9,%ymm9 4745 vpxor %ymm9,%ymm5,%ymm5 4746 vpsrld $20,%ymm5,%ymm3 4747 vpslld $12,%ymm5,%ymm5 4748 vpxor %ymm3,%ymm5,%ymm5 4749 vpaddd %ymm5,%ymm1,%ymm1 4750 vpxor %ymm1,%ymm13,%ymm13 4751 vpshufb .rol8(%rip),%ymm13,%ymm13 4752 vpaddd %ymm13,%ymm9,%ymm9 4753 vpxor %ymm9,%ymm5,%ymm5 4754 vpslld $7,%ymm5,%ymm3 4755 vpsrld $25,%ymm5,%ymm5 4756 vpxor %ymm3,%ymm5,%ymm5 4757 vpalignr $4,%ymm13,%ymm13,%ymm13 4758 vpalignr $8,%ymm9,%ymm9,%ymm9 4759 vpalignr $12,%ymm5,%ymm5,%ymm5 4760 vpaddd %ymm6,%ymm2,%ymm2 4761 vpxor %ymm2,%ymm14,%ymm14 4762 vpshufb .rol16(%rip),%ymm14,%ymm14 4763 vpaddd %ymm14,%ymm10,%ymm10 4764 vpxor %ymm10,%ymm6,%ymm6 4765 vpsrld $20,%ymm6,%ymm3 4766 vpslld $12,%ymm6,%ymm6 4767 vpxor %ymm3,%ymm6,%ymm6 4768 vpaddd %ymm6,%ymm2,%ymm2 4769 vpxor %ymm2,%ymm14,%ymm14 4770 vpshufb .rol8(%rip),%ymm14,%ymm14 4771 vpaddd %ymm14,%ymm10,%ymm10 4772 vpxor %ymm10,%ymm6,%ymm6 4773 vpslld $7,%ymm6,%ymm3 4774 vpsrld $25,%ymm6,%ymm6 4775 vpxor %ymm3,%ymm6,%ymm6 4776 vpalignr $4,%ymm14,%ymm14,%ymm14 4777 vpalignr $8,%ymm10,%ymm10,%ymm10 4778 vpalignr $12,%ymm6,%ymm6,%ymm6 4779 4780 cmpq %rcx,%r8 4781 jb 1b 4782 cmpq $10,%r8 4783 jne 2b 4784 movq %rbx,%r8 4785 subq %rsi,%rbx 4786 movq %rbx,%rcx 4787 movq 128(%rbp),%rbx 47881: 4789 addq $16,%rcx 4790 cmpq %rbx,%rcx 4791 jg 1f 4792 addq 0(%r8),%r10 4793 adcq 8+0(%r8),%r11 4794 adcq $1,%r12 4795 movq 0+0(%rbp),%rdx 4796 movq %rdx,%r15 4797 mulxq %r10,%r13,%r14 4798 mulxq %r11,%rax,%rdx 4799 imulq %r12,%r15 4800 addq %rax,%r14 4801 adcq %rdx,%r15 4802 movq 8+0(%rbp),%rdx 4803 mulxq %r10,%r10,%rax 4804 addq %r10,%r14 4805 mulxq %r11,%r11,%r9 4806 adcq %r11,%r15 4807 adcq $0,%r9 4808 imulq %r12,%rdx 4809 addq %rax,%r15 4810 adcq %rdx,%r9 4811 movq %r13,%r10 4812 movq %r14,%r11 4813 movq %r15,%r12 4814 andq $3,%r12 4815 movq %r15,%r13 4816 andq $-4,%r13 4817 movq %r9,%r14 4818 shrdq $2,%r9,%r15 4819 shrq $2,%r9 4820 addq %r13,%r10 4821 adcq %r14,%r11 4822 adcq $0,%r12 4823 addq %r15,%r10 4824 adcq %r9,%r11 4825 adcq $0,%r12 4826 4827 leaq 16(%r8),%r8 4828 jmp 1b 48291: 4830 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 4831 vpaddd 64(%rbp),%ymm5,%ymm5 4832 vpaddd 96(%rbp),%ymm9,%ymm9 4833 vpaddd 192(%rbp),%ymm13,%ymm13 4834 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 4835 vpaddd 64(%rbp),%ymm4,%ymm4 4836 vpaddd 96(%rbp),%ymm8,%ymm8 4837 vpaddd 160(%rbp),%ymm12,%ymm12 4838 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 4839 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 4840 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 4841 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 4842 vpxor 0+0(%rsi),%ymm3,%ymm3 4843 vpxor 32+0(%rsi),%ymm1,%ymm1 4844 vpxor 64+0(%rsi),%ymm5,%ymm5 4845 vpxor 96+0(%rsi),%ymm9,%ymm9 4846 vmovdqu %ymm3,0+0(%rdi) 4847 vmovdqu %ymm1,32+0(%rdi) 4848 vmovdqu %ymm5,64+0(%rdi) 4849 vmovdqu %ymm9,96+0(%rdi) 4850 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 4851 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 4852 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 4853 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 4854 vmovdqa %ymm3,%ymm8 4855 4856 leaq 128(%rsi),%rsi 4857 leaq 128(%rdi),%rdi 4858 subq $128,%rbx 4859 jmp open_avx2_tail_loop 48603: 4861 cmpq $384,%rbx 4862 ja 3f 4863 vmovdqa .chacha20_consts(%rip),%ymm0 4864 vmovdqa 64(%rbp),%ymm4 4865 vmovdqa 96(%rbp),%ymm8 4866 vmovdqa %ymm0,%ymm1 4867 vmovdqa %ymm4,%ymm5 4868 vmovdqa %ymm8,%ymm9 4869 vmovdqa %ymm0,%ymm2 4870 vmovdqa %ymm4,%ymm6 4871 vmovdqa %ymm8,%ymm10 4872 vmovdqa .avx2_inc(%rip),%ymm12 4873 vpaddd 160(%rbp),%ymm12,%ymm14 4874 vpaddd %ymm14,%ymm12,%ymm13 4875 vpaddd %ymm13,%ymm12,%ymm12 4876 vmovdqa %ymm12,160(%rbp) 4877 vmovdqa %ymm13,192(%rbp) 4878 vmovdqa %ymm14,224(%rbp) 4879 4880 movq %rbx,128(%rbp) 4881 movq %rbx,%rcx 4882 subq $256,%rcx 4883 shrq $4,%rcx 4884 addq $6,%rcx 4885 movq $10,%r8 4886 cmpq $10,%rcx 4887 cmovgq %r8,%rcx 4888 movq %rsi,%rbx 4889 xorq %r8,%r8 48901: 4891 addq 0(%rbx),%r10 4892 adcq 8+0(%rbx),%r11 4893 adcq $1,%r12 4894 movq 0+0(%rbp),%rdx 4895 movq %rdx,%r15 4896 mulxq %r10,%r13,%r14 4897 mulxq %r11,%rax,%rdx 4898 imulq %r12,%r15 4899 addq %rax,%r14 4900 adcq %rdx,%r15 4901 movq 8+0(%rbp),%rdx 4902 mulxq %r10,%r10,%rax 4903 addq %r10,%r14 4904 mulxq %r11,%r11,%r9 4905 adcq %r11,%r15 4906 adcq $0,%r9 4907 imulq %r12,%rdx 4908 addq %rax,%r15 4909 adcq %rdx,%r9 4910 movq %r13,%r10 4911 movq %r14,%r11 4912 movq %r15,%r12 4913 andq $3,%r12 4914 movq %r15,%r13 4915 andq $-4,%r13 4916 movq %r9,%r14 4917 shrdq $2,%r9,%r15 4918 shrq $2,%r9 4919 addq %r13,%r10 4920 adcq %r14,%r11 4921 adcq $0,%r12 4922 addq %r15,%r10 4923 adcq %r9,%r11 4924 adcq $0,%r12 4925 4926 leaq 16(%rbx),%rbx 49272: 4928 vpaddd %ymm6,%ymm2,%ymm2 4929 vpxor %ymm2,%ymm14,%ymm14 4930 vpshufb .rol16(%rip),%ymm14,%ymm14 4931 vpaddd %ymm14,%ymm10,%ymm10 4932 vpxor %ymm10,%ymm6,%ymm6 4933 vpsrld $20,%ymm6,%ymm3 4934 vpslld $12,%ymm6,%ymm6 4935 vpxor %ymm3,%ymm6,%ymm6 4936 vpaddd %ymm6,%ymm2,%ymm2 4937 vpxor %ymm2,%ymm14,%ymm14 4938 vpshufb .rol8(%rip),%ymm14,%ymm14 4939 vpaddd %ymm14,%ymm10,%ymm10 4940 vpxor %ymm10,%ymm6,%ymm6 4941 vpslld $7,%ymm6,%ymm3 4942 vpsrld $25,%ymm6,%ymm6 4943 vpxor %ymm3,%ymm6,%ymm6 4944 vpalignr $12,%ymm14,%ymm14,%ymm14 4945 vpalignr $8,%ymm10,%ymm10,%ymm10 4946 vpalignr $4,%ymm6,%ymm6,%ymm6 4947 vpaddd %ymm5,%ymm1,%ymm1 4948 vpxor %ymm1,%ymm13,%ymm13 4949 vpshufb .rol16(%rip),%ymm13,%ymm13 4950 vpaddd %ymm13,%ymm9,%ymm9 4951 vpxor %ymm9,%ymm5,%ymm5 4952 vpsrld $20,%ymm5,%ymm3 4953 vpslld $12,%ymm5,%ymm5 4954 vpxor %ymm3,%ymm5,%ymm5 4955 vpaddd %ymm5,%ymm1,%ymm1 4956 vpxor %ymm1,%ymm13,%ymm13 4957 vpshufb .rol8(%rip),%ymm13,%ymm13 4958 vpaddd %ymm13,%ymm9,%ymm9 4959 vpxor %ymm9,%ymm5,%ymm5 4960 vpslld $7,%ymm5,%ymm3 4961 vpsrld $25,%ymm5,%ymm5 4962 vpxor %ymm3,%ymm5,%ymm5 4963 vpalignr $12,%ymm13,%ymm13,%ymm13 4964 vpalignr $8,%ymm9,%ymm9,%ymm9 4965 vpalignr $4,%ymm5,%ymm5,%ymm5 4966 vpaddd %ymm4,%ymm0,%ymm0 4967 vpxor %ymm0,%ymm12,%ymm12 4968 vpshufb .rol16(%rip),%ymm12,%ymm12 4969 vpaddd %ymm12,%ymm8,%ymm8 4970 vpxor %ymm8,%ymm4,%ymm4 4971 vpsrld $20,%ymm4,%ymm3 4972 vpslld $12,%ymm4,%ymm4 4973 vpxor %ymm3,%ymm4,%ymm4 4974 vpaddd %ymm4,%ymm0,%ymm0 4975 vpxor %ymm0,%ymm12,%ymm12 4976 vpshufb .rol8(%rip),%ymm12,%ymm12 4977 vpaddd %ymm12,%ymm8,%ymm8 4978 vpxor %ymm8,%ymm4,%ymm4 4979 vpslld $7,%ymm4,%ymm3 4980 vpsrld $25,%ymm4,%ymm4 4981 vpxor %ymm3,%ymm4,%ymm4 4982 vpalignr $12,%ymm12,%ymm12,%ymm12 4983 vpalignr $8,%ymm8,%ymm8,%ymm8 4984 vpalignr $4,%ymm4,%ymm4,%ymm4 4985 addq 0(%rbx),%r10 4986 adcq 8+0(%rbx),%r11 4987 adcq $1,%r12 4988 movq 0+0(%rbp),%rax 4989 movq %rax,%r15 4990 mulq %r10 4991 movq %rax,%r13 4992 movq %rdx,%r14 4993 movq 0+0(%rbp),%rax 4994 mulq %r11 4995 imulq %r12,%r15 4996 addq %rax,%r14 4997 adcq %rdx,%r15 4998 movq 8+0(%rbp),%rax 4999 movq %rax,%r9 5000 mulq %r10 5001 addq %rax,%r14 5002 adcq $0,%rdx 5003 movq %rdx,%r10 5004 movq 8+0(%rbp),%rax 5005 mulq %r11 5006 addq %rax,%r15 5007 adcq $0,%rdx 5008 imulq %r12,%r9 5009 addq %r10,%r15 5010 adcq %rdx,%r9 5011 movq %r13,%r10 5012 movq %r14,%r11 5013 movq %r15,%r12 5014 andq $3,%r12 5015 movq %r15,%r13 5016 andq $-4,%r13 5017 movq %r9,%r14 5018 shrdq $2,%r9,%r15 5019 shrq $2,%r9 5020 addq %r13,%r10 5021 adcq %r14,%r11 5022 adcq $0,%r12 5023 addq %r15,%r10 5024 adcq %r9,%r11 5025 adcq $0,%r12 5026 5027 leaq 16(%rbx),%rbx 5028 incq %r8 5029 vpaddd %ymm6,%ymm2,%ymm2 5030 vpxor %ymm2,%ymm14,%ymm14 5031 vpshufb .rol16(%rip),%ymm14,%ymm14 5032 vpaddd %ymm14,%ymm10,%ymm10 5033 vpxor %ymm10,%ymm6,%ymm6 5034 vpsrld $20,%ymm6,%ymm3 5035 vpslld $12,%ymm6,%ymm6 5036 vpxor %ymm3,%ymm6,%ymm6 5037 vpaddd %ymm6,%ymm2,%ymm2 5038 vpxor %ymm2,%ymm14,%ymm14 5039 vpshufb .rol8(%rip),%ymm14,%ymm14 5040 vpaddd %ymm14,%ymm10,%ymm10 5041 vpxor %ymm10,%ymm6,%ymm6 5042 vpslld $7,%ymm6,%ymm3 5043 vpsrld $25,%ymm6,%ymm6 5044 vpxor %ymm3,%ymm6,%ymm6 5045 vpalignr $4,%ymm14,%ymm14,%ymm14 5046 vpalignr $8,%ymm10,%ymm10,%ymm10 5047 vpalignr $12,%ymm6,%ymm6,%ymm6 5048 vpaddd %ymm5,%ymm1,%ymm1 5049 vpxor %ymm1,%ymm13,%ymm13 5050 vpshufb .rol16(%rip),%ymm13,%ymm13 5051 vpaddd %ymm13,%ymm9,%ymm9 5052 vpxor %ymm9,%ymm5,%ymm5 5053 vpsrld $20,%ymm5,%ymm3 5054 vpslld $12,%ymm5,%ymm5 5055 vpxor %ymm3,%ymm5,%ymm5 5056 vpaddd %ymm5,%ymm1,%ymm1 5057 vpxor %ymm1,%ymm13,%ymm13 5058 vpshufb .rol8(%rip),%ymm13,%ymm13 5059 vpaddd %ymm13,%ymm9,%ymm9 5060 vpxor %ymm9,%ymm5,%ymm5 5061 vpslld $7,%ymm5,%ymm3 5062 vpsrld $25,%ymm5,%ymm5 5063 vpxor %ymm3,%ymm5,%ymm5 5064 vpalignr $4,%ymm13,%ymm13,%ymm13 5065 vpalignr $8,%ymm9,%ymm9,%ymm9 5066 vpalignr $12,%ymm5,%ymm5,%ymm5 5067 vpaddd %ymm4,%ymm0,%ymm0 5068 vpxor %ymm0,%ymm12,%ymm12 5069 vpshufb .rol16(%rip),%ymm12,%ymm12 5070 vpaddd %ymm12,%ymm8,%ymm8 5071 vpxor %ymm8,%ymm4,%ymm4 5072 vpsrld $20,%ymm4,%ymm3 5073 vpslld $12,%ymm4,%ymm4 5074 vpxor %ymm3,%ymm4,%ymm4 5075 vpaddd %ymm4,%ymm0,%ymm0 5076 vpxor %ymm0,%ymm12,%ymm12 5077 vpshufb .rol8(%rip),%ymm12,%ymm12 5078 vpaddd %ymm12,%ymm8,%ymm8 5079 vpxor %ymm8,%ymm4,%ymm4 5080 vpslld $7,%ymm4,%ymm3 5081 vpsrld $25,%ymm4,%ymm4 5082 vpxor %ymm3,%ymm4,%ymm4 5083 vpalignr $4,%ymm12,%ymm12,%ymm12 5084 vpalignr $8,%ymm8,%ymm8,%ymm8 5085 vpalignr $12,%ymm4,%ymm4,%ymm4 5086 5087 cmpq %rcx,%r8 5088 jb 1b 5089 cmpq $10,%r8 5090 jne 2b 5091 movq %rbx,%r8 5092 subq %rsi,%rbx 5093 movq %rbx,%rcx 5094 movq 128(%rbp),%rbx 50951: 5096 addq $16,%rcx 5097 cmpq %rbx,%rcx 5098 jg 1f 5099 addq 0(%r8),%r10 5100 adcq 8+0(%r8),%r11 5101 adcq $1,%r12 5102 movq 0+0(%rbp),%rdx 5103 movq %rdx,%r15 5104 mulxq %r10,%r13,%r14 5105 mulxq %r11,%rax,%rdx 5106 imulq %r12,%r15 5107 addq %rax,%r14 5108 adcq %rdx,%r15 5109 movq 8+0(%rbp),%rdx 5110 mulxq %r10,%r10,%rax 5111 addq %r10,%r14 5112 mulxq %r11,%r11,%r9 5113 adcq %r11,%r15 5114 adcq $0,%r9 5115 imulq %r12,%rdx 5116 addq %rax,%r15 5117 adcq %rdx,%r9 5118 movq %r13,%r10 5119 movq %r14,%r11 5120 movq %r15,%r12 5121 andq $3,%r12 5122 movq %r15,%r13 5123 andq $-4,%r13 5124 movq %r9,%r14 5125 shrdq $2,%r9,%r15 5126 shrq $2,%r9 5127 addq %r13,%r10 5128 adcq %r14,%r11 5129 adcq $0,%r12 5130 addq %r15,%r10 5131 adcq %r9,%r11 5132 adcq $0,%r12 5133 5134 leaq 16(%r8),%r8 5135 jmp 1b 51361: 5137 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 5138 vpaddd 64(%rbp),%ymm6,%ymm6 5139 vpaddd 96(%rbp),%ymm10,%ymm10 5140 vpaddd 224(%rbp),%ymm14,%ymm14 5141 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5142 vpaddd 64(%rbp),%ymm5,%ymm5 5143 vpaddd 96(%rbp),%ymm9,%ymm9 5144 vpaddd 192(%rbp),%ymm13,%ymm13 5145 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5146 vpaddd 64(%rbp),%ymm4,%ymm4 5147 vpaddd 96(%rbp),%ymm8,%ymm8 5148 vpaddd 160(%rbp),%ymm12,%ymm12 5149 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5150 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5151 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5152 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5153 vpxor 0+0(%rsi),%ymm3,%ymm3 5154 vpxor 32+0(%rsi),%ymm2,%ymm2 5155 vpxor 64+0(%rsi),%ymm6,%ymm6 5156 vpxor 96+0(%rsi),%ymm10,%ymm10 5157 vmovdqu %ymm3,0+0(%rdi) 5158 vmovdqu %ymm2,32+0(%rdi) 5159 vmovdqu %ymm6,64+0(%rdi) 5160 vmovdqu %ymm10,96+0(%rdi) 5161 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5162 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5163 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5164 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5165 vpxor 0+128(%rsi),%ymm3,%ymm3 5166 vpxor 32+128(%rsi),%ymm1,%ymm1 5167 vpxor 64+128(%rsi),%ymm5,%ymm5 5168 vpxor 96+128(%rsi),%ymm9,%ymm9 5169 vmovdqu %ymm3,0+128(%rdi) 5170 vmovdqu %ymm1,32+128(%rdi) 5171 vmovdqu %ymm5,64+128(%rdi) 5172 vmovdqu %ymm9,96+128(%rdi) 5173 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5174 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5175 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5176 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5177 vmovdqa %ymm3,%ymm8 5178 5179 leaq 256(%rsi),%rsi 5180 leaq 256(%rdi),%rdi 5181 subq $256,%rbx 5182 jmp open_avx2_tail_loop 51833: 5184 vmovdqa .chacha20_consts(%rip),%ymm0 5185 vmovdqa 64(%rbp),%ymm4 5186 vmovdqa 96(%rbp),%ymm8 5187 vmovdqa %ymm0,%ymm1 5188 vmovdqa %ymm4,%ymm5 5189 vmovdqa %ymm8,%ymm9 5190 vmovdqa %ymm0,%ymm2 5191 vmovdqa %ymm4,%ymm6 5192 vmovdqa %ymm8,%ymm10 5193 vmovdqa %ymm0,%ymm3 5194 vmovdqa %ymm4,%ymm7 5195 vmovdqa %ymm8,%ymm11 5196 vmovdqa .avx2_inc(%rip),%ymm12 5197 vpaddd 160(%rbp),%ymm12,%ymm15 5198 vpaddd %ymm15,%ymm12,%ymm14 5199 vpaddd %ymm14,%ymm12,%ymm13 5200 vpaddd %ymm13,%ymm12,%ymm12 5201 vmovdqa %ymm15,256(%rbp) 5202 vmovdqa %ymm14,224(%rbp) 5203 vmovdqa %ymm13,192(%rbp) 5204 vmovdqa %ymm12,160(%rbp) 5205 5206 xorq %rcx,%rcx 5207 movq %rsi,%r8 52081: 5209 addq 0(%r8),%r10 5210 adcq 8+0(%r8),%r11 5211 adcq $1,%r12 5212 movq 0+0(%rbp),%rax 5213 movq %rax,%r15 5214 mulq %r10 5215 movq %rax,%r13 5216 movq %rdx,%r14 5217 movq 0+0(%rbp),%rax 5218 mulq %r11 5219 imulq %r12,%r15 5220 addq %rax,%r14 5221 adcq %rdx,%r15 5222 movq 8+0(%rbp),%rax 5223 movq %rax,%r9 5224 mulq %r10 5225 addq %rax,%r14 5226 adcq $0,%rdx 5227 movq %rdx,%r10 5228 movq 8+0(%rbp),%rax 5229 mulq %r11 5230 addq %rax,%r15 5231 adcq $0,%rdx 5232 imulq %r12,%r9 5233 addq %r10,%r15 5234 adcq %rdx,%r9 5235 movq %r13,%r10 5236 movq %r14,%r11 5237 movq %r15,%r12 5238 andq $3,%r12 5239 movq %r15,%r13 5240 andq $-4,%r13 5241 movq %r9,%r14 5242 shrdq $2,%r9,%r15 5243 shrq $2,%r9 5244 addq %r13,%r10 5245 adcq %r14,%r11 5246 adcq $0,%r12 5247 addq %r15,%r10 5248 adcq %r9,%r11 5249 adcq $0,%r12 5250 5251 leaq 16(%r8),%r8 52522: 5253 vmovdqa %ymm8,128(%rbp) 5254 vmovdqa .rol16(%rip),%ymm8 5255 vpaddd %ymm7,%ymm3,%ymm3 5256 vpaddd %ymm6,%ymm2,%ymm2 5257 vpaddd %ymm5,%ymm1,%ymm1 5258 vpaddd %ymm4,%ymm0,%ymm0 5259 vpxor %ymm3,%ymm15,%ymm15 5260 vpxor %ymm2,%ymm14,%ymm14 5261 vpxor %ymm1,%ymm13,%ymm13 5262 vpxor %ymm0,%ymm12,%ymm12 5263 vpshufb %ymm8,%ymm15,%ymm15 5264 vpshufb %ymm8,%ymm14,%ymm14 5265 vpshufb %ymm8,%ymm13,%ymm13 5266 vpshufb %ymm8,%ymm12,%ymm12 5267 vmovdqa 128(%rbp),%ymm8 5268 vpaddd %ymm15,%ymm11,%ymm11 5269 vpaddd %ymm14,%ymm10,%ymm10 5270 vpaddd %ymm13,%ymm9,%ymm9 5271 vpaddd %ymm12,%ymm8,%ymm8 5272 vpxor %ymm11,%ymm7,%ymm7 5273 vpxor %ymm10,%ymm6,%ymm6 5274 vpxor %ymm9,%ymm5,%ymm5 5275 vpxor %ymm8,%ymm4,%ymm4 5276 vmovdqa %ymm8,128(%rbp) 5277 vpsrld $20,%ymm7,%ymm8 5278 vpslld $32-20,%ymm7,%ymm7 5279 vpxor %ymm8,%ymm7,%ymm7 5280 vpsrld $20,%ymm6,%ymm8 5281 vpslld $32-20,%ymm6,%ymm6 5282 vpxor %ymm8,%ymm6,%ymm6 5283 vpsrld $20,%ymm5,%ymm8 5284 vpslld $32-20,%ymm5,%ymm5 5285 vpxor %ymm8,%ymm5,%ymm5 5286 vpsrld $20,%ymm4,%ymm8 5287 vpslld $32-20,%ymm4,%ymm4 5288 vpxor %ymm8,%ymm4,%ymm4 5289 vmovdqa .rol8(%rip),%ymm8 5290 addq 0(%r8),%r10 5291 adcq 8+0(%r8),%r11 5292 adcq $1,%r12 5293 movq 0+0(%rbp),%rdx 5294 movq %rdx,%r15 5295 mulxq %r10,%r13,%r14 5296 mulxq %r11,%rax,%rdx 5297 imulq %r12,%r15 5298 addq %rax,%r14 5299 adcq %rdx,%r15 5300 movq 8+0(%rbp),%rdx 5301 mulxq %r10,%r10,%rax 5302 addq %r10,%r14 5303 mulxq %r11,%r11,%r9 5304 adcq %r11,%r15 5305 adcq $0,%r9 5306 imulq %r12,%rdx 5307 addq %rax,%r15 5308 adcq %rdx,%r9 5309 movq %r13,%r10 5310 movq %r14,%r11 5311 movq %r15,%r12 5312 andq $3,%r12 5313 movq %r15,%r13 5314 andq $-4,%r13 5315 movq %r9,%r14 5316 shrdq $2,%r9,%r15 5317 shrq $2,%r9 5318 addq %r13,%r10 5319 adcq %r14,%r11 5320 adcq $0,%r12 5321 addq %r15,%r10 5322 adcq %r9,%r11 5323 adcq $0,%r12 5324 vpaddd %ymm7,%ymm3,%ymm3 5325 vpaddd %ymm6,%ymm2,%ymm2 5326 vpaddd %ymm5,%ymm1,%ymm1 5327 vpaddd %ymm4,%ymm0,%ymm0 5328 vpxor %ymm3,%ymm15,%ymm15 5329 vpxor %ymm2,%ymm14,%ymm14 5330 vpxor %ymm1,%ymm13,%ymm13 5331 vpxor %ymm0,%ymm12,%ymm12 5332 vpshufb %ymm8,%ymm15,%ymm15 5333 vpshufb %ymm8,%ymm14,%ymm14 5334 vpshufb %ymm8,%ymm13,%ymm13 5335 vpshufb %ymm8,%ymm12,%ymm12 5336 vmovdqa 128(%rbp),%ymm8 5337 vpaddd %ymm15,%ymm11,%ymm11 5338 vpaddd %ymm14,%ymm10,%ymm10 5339 vpaddd %ymm13,%ymm9,%ymm9 5340 vpaddd %ymm12,%ymm8,%ymm8 5341 vpxor %ymm11,%ymm7,%ymm7 5342 vpxor %ymm10,%ymm6,%ymm6 5343 vpxor %ymm9,%ymm5,%ymm5 5344 vpxor %ymm8,%ymm4,%ymm4 5345 vmovdqa %ymm8,128(%rbp) 5346 vpsrld $25,%ymm7,%ymm8 5347 vpslld $32-25,%ymm7,%ymm7 5348 vpxor %ymm8,%ymm7,%ymm7 5349 vpsrld $25,%ymm6,%ymm8 5350 vpslld $32-25,%ymm6,%ymm6 5351 vpxor %ymm8,%ymm6,%ymm6 5352 vpsrld $25,%ymm5,%ymm8 5353 vpslld $32-25,%ymm5,%ymm5 5354 vpxor %ymm8,%ymm5,%ymm5 5355 vpsrld $25,%ymm4,%ymm8 5356 vpslld $32-25,%ymm4,%ymm4 5357 vpxor %ymm8,%ymm4,%ymm4 5358 vmovdqa 128(%rbp),%ymm8 5359 vpalignr $4,%ymm7,%ymm7,%ymm7 5360 vpalignr $8,%ymm11,%ymm11,%ymm11 5361 vpalignr $12,%ymm15,%ymm15,%ymm15 5362 vpalignr $4,%ymm6,%ymm6,%ymm6 5363 vpalignr $8,%ymm10,%ymm10,%ymm10 5364 vpalignr $12,%ymm14,%ymm14,%ymm14 5365 vpalignr $4,%ymm5,%ymm5,%ymm5 5366 vpalignr $8,%ymm9,%ymm9,%ymm9 5367 vpalignr $12,%ymm13,%ymm13,%ymm13 5368 vpalignr $4,%ymm4,%ymm4,%ymm4 5369 vpalignr $8,%ymm8,%ymm8,%ymm8 5370 vpalignr $12,%ymm12,%ymm12,%ymm12 5371 vmovdqa %ymm8,128(%rbp) 5372 addq 16(%r8),%r10 5373 adcq 8+16(%r8),%r11 5374 adcq $1,%r12 5375 movq 0+0(%rbp),%rdx 5376 movq %rdx,%r15 5377 mulxq %r10,%r13,%r14 5378 mulxq %r11,%rax,%rdx 5379 imulq %r12,%r15 5380 addq %rax,%r14 5381 adcq %rdx,%r15 5382 movq 8+0(%rbp),%rdx 5383 mulxq %r10,%r10,%rax 5384 addq %r10,%r14 5385 mulxq %r11,%r11,%r9 5386 adcq %r11,%r15 5387 adcq $0,%r9 5388 imulq %r12,%rdx 5389 addq %rax,%r15 5390 adcq %rdx,%r9 5391 movq %r13,%r10 5392 movq %r14,%r11 5393 movq %r15,%r12 5394 andq $3,%r12 5395 movq %r15,%r13 5396 andq $-4,%r13 5397 movq %r9,%r14 5398 shrdq $2,%r9,%r15 5399 shrq $2,%r9 5400 addq %r13,%r10 5401 adcq %r14,%r11 5402 adcq $0,%r12 5403 addq %r15,%r10 5404 adcq %r9,%r11 5405 adcq $0,%r12 5406 5407 leaq 32(%r8),%r8 5408 vmovdqa .rol16(%rip),%ymm8 5409 vpaddd %ymm7,%ymm3,%ymm3 5410 vpaddd %ymm6,%ymm2,%ymm2 5411 vpaddd %ymm5,%ymm1,%ymm1 5412 vpaddd %ymm4,%ymm0,%ymm0 5413 vpxor %ymm3,%ymm15,%ymm15 5414 vpxor %ymm2,%ymm14,%ymm14 5415 vpxor %ymm1,%ymm13,%ymm13 5416 vpxor %ymm0,%ymm12,%ymm12 5417 vpshufb %ymm8,%ymm15,%ymm15 5418 vpshufb %ymm8,%ymm14,%ymm14 5419 vpshufb %ymm8,%ymm13,%ymm13 5420 vpshufb %ymm8,%ymm12,%ymm12 5421 vmovdqa 128(%rbp),%ymm8 5422 vpaddd %ymm15,%ymm11,%ymm11 5423 vpaddd %ymm14,%ymm10,%ymm10 5424 vpaddd %ymm13,%ymm9,%ymm9 5425 vpaddd %ymm12,%ymm8,%ymm8 5426 vpxor %ymm11,%ymm7,%ymm7 5427 vpxor %ymm10,%ymm6,%ymm6 5428 vpxor %ymm9,%ymm5,%ymm5 5429 vpxor %ymm8,%ymm4,%ymm4 5430 vmovdqa %ymm8,128(%rbp) 5431 vpsrld $20,%ymm7,%ymm8 5432 vpslld $32-20,%ymm7,%ymm7 5433 vpxor %ymm8,%ymm7,%ymm7 5434 vpsrld $20,%ymm6,%ymm8 5435 vpslld $32-20,%ymm6,%ymm6 5436 vpxor %ymm8,%ymm6,%ymm6 5437 vpsrld $20,%ymm5,%ymm8 5438 vpslld $32-20,%ymm5,%ymm5 5439 vpxor %ymm8,%ymm5,%ymm5 5440 vpsrld $20,%ymm4,%ymm8 5441 vpslld $32-20,%ymm4,%ymm4 5442 vpxor %ymm8,%ymm4,%ymm4 5443 vmovdqa .rol8(%rip),%ymm8 5444 vpaddd %ymm7,%ymm3,%ymm3 5445 vpaddd %ymm6,%ymm2,%ymm2 5446 vpaddd %ymm5,%ymm1,%ymm1 5447 vpaddd %ymm4,%ymm0,%ymm0 5448 vpxor %ymm3,%ymm15,%ymm15 5449 vpxor %ymm2,%ymm14,%ymm14 5450 vpxor %ymm1,%ymm13,%ymm13 5451 vpxor %ymm0,%ymm12,%ymm12 5452 vpshufb %ymm8,%ymm15,%ymm15 5453 vpshufb %ymm8,%ymm14,%ymm14 5454 vpshufb %ymm8,%ymm13,%ymm13 5455 vpshufb %ymm8,%ymm12,%ymm12 5456 vmovdqa 128(%rbp),%ymm8 5457 vpaddd %ymm15,%ymm11,%ymm11 5458 vpaddd %ymm14,%ymm10,%ymm10 5459 vpaddd %ymm13,%ymm9,%ymm9 5460 vpaddd %ymm12,%ymm8,%ymm8 5461 vpxor %ymm11,%ymm7,%ymm7 5462 vpxor %ymm10,%ymm6,%ymm6 5463 vpxor %ymm9,%ymm5,%ymm5 5464 vpxor %ymm8,%ymm4,%ymm4 5465 vmovdqa %ymm8,128(%rbp) 5466 vpsrld $25,%ymm7,%ymm8 5467 vpslld $32-25,%ymm7,%ymm7 5468 vpxor %ymm8,%ymm7,%ymm7 5469 vpsrld $25,%ymm6,%ymm8 5470 vpslld $32-25,%ymm6,%ymm6 5471 vpxor %ymm8,%ymm6,%ymm6 5472 vpsrld $25,%ymm5,%ymm8 5473 vpslld $32-25,%ymm5,%ymm5 5474 vpxor %ymm8,%ymm5,%ymm5 5475 vpsrld $25,%ymm4,%ymm8 5476 vpslld $32-25,%ymm4,%ymm4 5477 vpxor %ymm8,%ymm4,%ymm4 5478 vmovdqa 128(%rbp),%ymm8 5479 vpalignr $12,%ymm7,%ymm7,%ymm7 5480 vpalignr $8,%ymm11,%ymm11,%ymm11 5481 vpalignr $4,%ymm15,%ymm15,%ymm15 5482 vpalignr $12,%ymm6,%ymm6,%ymm6 5483 vpalignr $8,%ymm10,%ymm10,%ymm10 5484 vpalignr $4,%ymm14,%ymm14,%ymm14 5485 vpalignr $12,%ymm5,%ymm5,%ymm5 5486 vpalignr $8,%ymm9,%ymm9,%ymm9 5487 vpalignr $4,%ymm13,%ymm13,%ymm13 5488 vpalignr $12,%ymm4,%ymm4,%ymm4 5489 vpalignr $8,%ymm8,%ymm8,%ymm8 5490 vpalignr $4,%ymm12,%ymm12,%ymm12 5491 5492 incq %rcx 5493 cmpq $4,%rcx 5494 jl 1b 5495 cmpq $10,%rcx 5496 jne 2b 5497 movq %rbx,%rcx 5498 subq $384,%rcx 5499 andq $-16,%rcx 55001: 5501 testq %rcx,%rcx 5502 je 1f 5503 addq 0(%r8),%r10 5504 adcq 8+0(%r8),%r11 5505 adcq $1,%r12 5506 movq 0+0(%rbp),%rdx 5507 movq %rdx,%r15 5508 mulxq %r10,%r13,%r14 5509 mulxq %r11,%rax,%rdx 5510 imulq %r12,%r15 5511 addq %rax,%r14 5512 adcq %rdx,%r15 5513 movq 8+0(%rbp),%rdx 5514 mulxq %r10,%r10,%rax 5515 addq %r10,%r14 5516 mulxq %r11,%r11,%r9 5517 adcq %r11,%r15 5518 adcq $0,%r9 5519 imulq %r12,%rdx 5520 addq %rax,%r15 5521 adcq %rdx,%r9 5522 movq %r13,%r10 5523 movq %r14,%r11 5524 movq %r15,%r12 5525 andq $3,%r12 5526 movq %r15,%r13 5527 andq $-4,%r13 5528 movq %r9,%r14 5529 shrdq $2,%r9,%r15 5530 shrq $2,%r9 5531 addq %r13,%r10 5532 adcq %r14,%r11 5533 adcq $0,%r12 5534 addq %r15,%r10 5535 adcq %r9,%r11 5536 adcq $0,%r12 5537 5538 leaq 16(%r8),%r8 5539 subq $16,%rcx 5540 jmp 1b 55411: 5542 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 5543 vpaddd 64(%rbp),%ymm7,%ymm7 5544 vpaddd 96(%rbp),%ymm11,%ymm11 5545 vpaddd 256(%rbp),%ymm15,%ymm15 5546 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 5547 vpaddd 64(%rbp),%ymm6,%ymm6 5548 vpaddd 96(%rbp),%ymm10,%ymm10 5549 vpaddd 224(%rbp),%ymm14,%ymm14 5550 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 5551 vpaddd 64(%rbp),%ymm5,%ymm5 5552 vpaddd 96(%rbp),%ymm9,%ymm9 5553 vpaddd 192(%rbp),%ymm13,%ymm13 5554 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 5555 vpaddd 64(%rbp),%ymm4,%ymm4 5556 vpaddd 96(%rbp),%ymm8,%ymm8 5557 vpaddd 160(%rbp),%ymm12,%ymm12 5558 5559 vmovdqa %ymm0,128(%rbp) 5560 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 5561 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 5562 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 5563 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 5564 vpxor 0+0(%rsi),%ymm0,%ymm0 5565 vpxor 32+0(%rsi),%ymm3,%ymm3 5566 vpxor 64+0(%rsi),%ymm7,%ymm7 5567 vpxor 96+0(%rsi),%ymm11,%ymm11 5568 vmovdqu %ymm0,0+0(%rdi) 5569 vmovdqu %ymm3,32+0(%rdi) 5570 vmovdqu %ymm7,64+0(%rdi) 5571 vmovdqu %ymm11,96+0(%rdi) 5572 5573 vmovdqa 128(%rbp),%ymm0 5574 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 5575 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 5576 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 5577 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 5578 vpxor 0+128(%rsi),%ymm3,%ymm3 5579 vpxor 32+128(%rsi),%ymm2,%ymm2 5580 vpxor 64+128(%rsi),%ymm6,%ymm6 5581 vpxor 96+128(%rsi),%ymm10,%ymm10 5582 vmovdqu %ymm3,0+128(%rdi) 5583 vmovdqu %ymm2,32+128(%rdi) 5584 vmovdqu %ymm6,64+128(%rdi) 5585 vmovdqu %ymm10,96+128(%rdi) 5586 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 5587 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 5588 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 5589 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 5590 vpxor 0+256(%rsi),%ymm3,%ymm3 5591 vpxor 32+256(%rsi),%ymm1,%ymm1 5592 vpxor 64+256(%rsi),%ymm5,%ymm5 5593 vpxor 96+256(%rsi),%ymm9,%ymm9 5594 vmovdqu %ymm3,0+256(%rdi) 5595 vmovdqu %ymm1,32+256(%rdi) 5596 vmovdqu %ymm5,64+256(%rdi) 5597 vmovdqu %ymm9,96+256(%rdi) 5598 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 5599 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 5600 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 5601 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 5602 vmovdqa %ymm3,%ymm8 5603 5604 leaq 384(%rsi),%rsi 5605 leaq 384(%rdi),%rdi 5606 subq $384,%rbx 5607open_avx2_tail_loop: 5608 cmpq $32,%rbx 5609 jb open_avx2_tail 5610 subq $32,%rbx 5611 vpxor (%rsi),%ymm0,%ymm0 5612 vmovdqu %ymm0,(%rdi) 5613 leaq 32(%rsi),%rsi 5614 leaq 32(%rdi),%rdi 5615 vmovdqa %ymm4,%ymm0 5616 vmovdqa %ymm8,%ymm4 5617 vmovdqa %ymm12,%ymm8 5618 jmp open_avx2_tail_loop 5619open_avx2_tail: 5620 cmpq $16,%rbx 5621 vmovdqa %xmm0,%xmm1 5622 jb 1f 5623 subq $16,%rbx 5624 5625 vpxor (%rsi),%xmm0,%xmm1 5626 vmovdqu %xmm1,(%rdi) 5627 leaq 16(%rsi),%rsi 5628 leaq 16(%rdi),%rdi 5629 vperm2i128 $0x11,%ymm0,%ymm0,%ymm0 5630 vmovdqa %xmm0,%xmm1 56311: 5632 vzeroupper 5633 jmp open_sse_tail_16 5634 5635open_avx2_192: 5636 vmovdqa %ymm0,%ymm1 5637 vmovdqa %ymm0,%ymm2 5638 vmovdqa %ymm4,%ymm5 5639 vmovdqa %ymm4,%ymm6 5640 vmovdqa %ymm8,%ymm9 5641 vmovdqa %ymm8,%ymm10 5642 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 5643 vmovdqa %ymm12,%ymm11 5644 vmovdqa %ymm13,%ymm15 5645 movq $10,%r10 56461: 5647 vpaddd %ymm4,%ymm0,%ymm0 5648 vpxor %ymm0,%ymm12,%ymm12 5649 vpshufb .rol16(%rip),%ymm12,%ymm12 5650 vpaddd %ymm12,%ymm8,%ymm8 5651 vpxor %ymm8,%ymm4,%ymm4 5652 vpsrld $20,%ymm4,%ymm3 5653 vpslld $12,%ymm4,%ymm4 5654 vpxor %ymm3,%ymm4,%ymm4 5655 vpaddd %ymm4,%ymm0,%ymm0 5656 vpxor %ymm0,%ymm12,%ymm12 5657 vpshufb .rol8(%rip),%ymm12,%ymm12 5658 vpaddd %ymm12,%ymm8,%ymm8 5659 vpxor %ymm8,%ymm4,%ymm4 5660 vpslld $7,%ymm4,%ymm3 5661 vpsrld $25,%ymm4,%ymm4 5662 vpxor %ymm3,%ymm4,%ymm4 5663 vpalignr $12,%ymm12,%ymm12,%ymm12 5664 vpalignr $8,%ymm8,%ymm8,%ymm8 5665 vpalignr $4,%ymm4,%ymm4,%ymm4 5666 vpaddd %ymm5,%ymm1,%ymm1 5667 vpxor %ymm1,%ymm13,%ymm13 5668 vpshufb .rol16(%rip),%ymm13,%ymm13 5669 vpaddd %ymm13,%ymm9,%ymm9 5670 vpxor %ymm9,%ymm5,%ymm5 5671 vpsrld $20,%ymm5,%ymm3 5672 vpslld $12,%ymm5,%ymm5 5673 vpxor %ymm3,%ymm5,%ymm5 5674 vpaddd %ymm5,%ymm1,%ymm1 5675 vpxor %ymm1,%ymm13,%ymm13 5676 vpshufb .rol8(%rip),%ymm13,%ymm13 5677 vpaddd %ymm13,%ymm9,%ymm9 5678 vpxor %ymm9,%ymm5,%ymm5 5679 vpslld $7,%ymm5,%ymm3 5680 vpsrld $25,%ymm5,%ymm5 5681 vpxor %ymm3,%ymm5,%ymm5 5682 vpalignr $12,%ymm13,%ymm13,%ymm13 5683 vpalignr $8,%ymm9,%ymm9,%ymm9 5684 vpalignr $4,%ymm5,%ymm5,%ymm5 5685 vpaddd %ymm4,%ymm0,%ymm0 5686 vpxor %ymm0,%ymm12,%ymm12 5687 vpshufb .rol16(%rip),%ymm12,%ymm12 5688 vpaddd %ymm12,%ymm8,%ymm8 5689 vpxor %ymm8,%ymm4,%ymm4 5690 vpsrld $20,%ymm4,%ymm3 5691 vpslld $12,%ymm4,%ymm4 5692 vpxor %ymm3,%ymm4,%ymm4 5693 vpaddd %ymm4,%ymm0,%ymm0 5694 vpxor %ymm0,%ymm12,%ymm12 5695 vpshufb .rol8(%rip),%ymm12,%ymm12 5696 vpaddd %ymm12,%ymm8,%ymm8 5697 vpxor %ymm8,%ymm4,%ymm4 5698 vpslld $7,%ymm4,%ymm3 5699 vpsrld $25,%ymm4,%ymm4 5700 vpxor %ymm3,%ymm4,%ymm4 5701 vpalignr $4,%ymm12,%ymm12,%ymm12 5702 vpalignr $8,%ymm8,%ymm8,%ymm8 5703 vpalignr $12,%ymm4,%ymm4,%ymm4 5704 vpaddd %ymm5,%ymm1,%ymm1 5705 vpxor %ymm1,%ymm13,%ymm13 5706 vpshufb .rol16(%rip),%ymm13,%ymm13 5707 vpaddd %ymm13,%ymm9,%ymm9 5708 vpxor %ymm9,%ymm5,%ymm5 5709 vpsrld $20,%ymm5,%ymm3 5710 vpslld $12,%ymm5,%ymm5 5711 vpxor %ymm3,%ymm5,%ymm5 5712 vpaddd %ymm5,%ymm1,%ymm1 5713 vpxor %ymm1,%ymm13,%ymm13 5714 vpshufb .rol8(%rip),%ymm13,%ymm13 5715 vpaddd %ymm13,%ymm9,%ymm9 5716 vpxor %ymm9,%ymm5,%ymm5 5717 vpslld $7,%ymm5,%ymm3 5718 vpsrld $25,%ymm5,%ymm5 5719 vpxor %ymm3,%ymm5,%ymm5 5720 vpalignr $4,%ymm13,%ymm13,%ymm13 5721 vpalignr $8,%ymm9,%ymm9,%ymm9 5722 vpalignr $12,%ymm5,%ymm5,%ymm5 5723 5724 decq %r10 5725 jne 1b 5726 vpaddd %ymm2,%ymm0,%ymm0 5727 vpaddd %ymm2,%ymm1,%ymm1 5728 vpaddd %ymm6,%ymm4,%ymm4 5729 vpaddd %ymm6,%ymm5,%ymm5 5730 vpaddd %ymm10,%ymm8,%ymm8 5731 vpaddd %ymm10,%ymm9,%ymm9 5732 vpaddd %ymm11,%ymm12,%ymm12 5733 vpaddd %ymm15,%ymm13,%ymm13 5734 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 5735 5736 vpand .clamp(%rip),%ymm3,%ymm3 5737 vmovdqa %ymm3,0(%rbp) 5738 5739 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 5740 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 5741 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 5742 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 5743 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 5744 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 5745open_avx2_short: 5746 movq %r8,%r8 5747 call poly_hash_ad_internal 5748open_avx2_hash_and_xor_loop: 5749 cmpq $32,%rbx 5750 jb open_avx2_short_tail_32 5751 subq $32,%rbx 5752 addq 0(%rsi),%r10 5753 adcq 8+0(%rsi),%r11 5754 adcq $1,%r12 5755 movq 0+0(%rbp),%rax 5756 movq %rax,%r15 5757 mulq %r10 5758 movq %rax,%r13 5759 movq %rdx,%r14 5760 movq 0+0(%rbp),%rax 5761 mulq %r11 5762 imulq %r12,%r15 5763 addq %rax,%r14 5764 adcq %rdx,%r15 5765 movq 8+0(%rbp),%rax 5766 movq %rax,%r9 5767 mulq %r10 5768 addq %rax,%r14 5769 adcq $0,%rdx 5770 movq %rdx,%r10 5771 movq 8+0(%rbp),%rax 5772 mulq %r11 5773 addq %rax,%r15 5774 adcq $0,%rdx 5775 imulq %r12,%r9 5776 addq %r10,%r15 5777 adcq %rdx,%r9 5778 movq %r13,%r10 5779 movq %r14,%r11 5780 movq %r15,%r12 5781 andq $3,%r12 5782 movq %r15,%r13 5783 andq $-4,%r13 5784 movq %r9,%r14 5785 shrdq $2,%r9,%r15 5786 shrq $2,%r9 5787 addq %r13,%r10 5788 adcq %r14,%r11 5789 adcq $0,%r12 5790 addq %r15,%r10 5791 adcq %r9,%r11 5792 adcq $0,%r12 5793 addq 16(%rsi),%r10 5794 adcq 8+16(%rsi),%r11 5795 adcq $1,%r12 5796 movq 0+0(%rbp),%rax 5797 movq %rax,%r15 5798 mulq %r10 5799 movq %rax,%r13 5800 movq %rdx,%r14 5801 movq 0+0(%rbp),%rax 5802 mulq %r11 5803 imulq %r12,%r15 5804 addq %rax,%r14 5805 adcq %rdx,%r15 5806 movq 8+0(%rbp),%rax 5807 movq %rax,%r9 5808 mulq %r10 5809 addq %rax,%r14 5810 adcq $0,%rdx 5811 movq %rdx,%r10 5812 movq 8+0(%rbp),%rax 5813 mulq %r11 5814 addq %rax,%r15 5815 adcq $0,%rdx 5816 imulq %r12,%r9 5817 addq %r10,%r15 5818 adcq %rdx,%r9 5819 movq %r13,%r10 5820 movq %r14,%r11 5821 movq %r15,%r12 5822 andq $3,%r12 5823 movq %r15,%r13 5824 andq $-4,%r13 5825 movq %r9,%r14 5826 shrdq $2,%r9,%r15 5827 shrq $2,%r9 5828 addq %r13,%r10 5829 adcq %r14,%r11 5830 adcq $0,%r12 5831 addq %r15,%r10 5832 adcq %r9,%r11 5833 adcq $0,%r12 5834 5835 5836 vpxor (%rsi),%ymm0,%ymm0 5837 vmovdqu %ymm0,(%rdi) 5838 leaq 32(%rsi),%rsi 5839 leaq 32(%rdi),%rdi 5840 5841 vmovdqa %ymm4,%ymm0 5842 vmovdqa %ymm8,%ymm4 5843 vmovdqa %ymm12,%ymm8 5844 vmovdqa %ymm1,%ymm12 5845 vmovdqa %ymm5,%ymm1 5846 vmovdqa %ymm9,%ymm5 5847 vmovdqa %ymm13,%ymm9 5848 vmovdqa %ymm2,%ymm13 5849 vmovdqa %ymm6,%ymm2 5850 jmp open_avx2_hash_and_xor_loop 5851open_avx2_short_tail_32: 5852 cmpq $16,%rbx 5853 vmovdqa %xmm0,%xmm1 5854 jb 1f 5855 subq $16,%rbx 5856 addq 0(%rsi),%r10 5857 adcq 8+0(%rsi),%r11 5858 adcq $1,%r12 5859 movq 0+0(%rbp),%rax 5860 movq %rax,%r15 5861 mulq %r10 5862 movq %rax,%r13 5863 movq %rdx,%r14 5864 movq 0+0(%rbp),%rax 5865 mulq %r11 5866 imulq %r12,%r15 5867 addq %rax,%r14 5868 adcq %rdx,%r15 5869 movq 8+0(%rbp),%rax 5870 movq %rax,%r9 5871 mulq %r10 5872 addq %rax,%r14 5873 adcq $0,%rdx 5874 movq %rdx,%r10 5875 movq 8+0(%rbp),%rax 5876 mulq %r11 5877 addq %rax,%r15 5878 adcq $0,%rdx 5879 imulq %r12,%r9 5880 addq %r10,%r15 5881 adcq %rdx,%r9 5882 movq %r13,%r10 5883 movq %r14,%r11 5884 movq %r15,%r12 5885 andq $3,%r12 5886 movq %r15,%r13 5887 andq $-4,%r13 5888 movq %r9,%r14 5889 shrdq $2,%r9,%r15 5890 shrq $2,%r9 5891 addq %r13,%r10 5892 adcq %r14,%r11 5893 adcq $0,%r12 5894 addq %r15,%r10 5895 adcq %r9,%r11 5896 adcq $0,%r12 5897 5898 vpxor (%rsi),%xmm0,%xmm3 5899 vmovdqu %xmm3,(%rdi) 5900 leaq 16(%rsi),%rsi 5901 leaq 16(%rdi),%rdi 5902 vextracti128 $1,%ymm0,%xmm1 59031: 5904 vzeroupper 5905 jmp open_sse_tail_16 5906 5907open_avx2_320: 5908 vmovdqa %ymm0,%ymm1 5909 vmovdqa %ymm0,%ymm2 5910 vmovdqa %ymm4,%ymm5 5911 vmovdqa %ymm4,%ymm6 5912 vmovdqa %ymm8,%ymm9 5913 vmovdqa %ymm8,%ymm10 5914 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 5915 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 5916 vmovdqa %ymm4,%ymm7 5917 vmovdqa %ymm8,%ymm11 5918 vmovdqa %ymm12,160(%rbp) 5919 vmovdqa %ymm13,192(%rbp) 5920 vmovdqa %ymm14,224(%rbp) 5921 movq $10,%r10 59221: 5923 vpaddd %ymm4,%ymm0,%ymm0 5924 vpxor %ymm0,%ymm12,%ymm12 5925 vpshufb .rol16(%rip),%ymm12,%ymm12 5926 vpaddd %ymm12,%ymm8,%ymm8 5927 vpxor %ymm8,%ymm4,%ymm4 5928 vpsrld $20,%ymm4,%ymm3 5929 vpslld $12,%ymm4,%ymm4 5930 vpxor %ymm3,%ymm4,%ymm4 5931 vpaddd %ymm4,%ymm0,%ymm0 5932 vpxor %ymm0,%ymm12,%ymm12 5933 vpshufb .rol8(%rip),%ymm12,%ymm12 5934 vpaddd %ymm12,%ymm8,%ymm8 5935 vpxor %ymm8,%ymm4,%ymm4 5936 vpslld $7,%ymm4,%ymm3 5937 vpsrld $25,%ymm4,%ymm4 5938 vpxor %ymm3,%ymm4,%ymm4 5939 vpalignr $12,%ymm12,%ymm12,%ymm12 5940 vpalignr $8,%ymm8,%ymm8,%ymm8 5941 vpalignr $4,%ymm4,%ymm4,%ymm4 5942 vpaddd %ymm5,%ymm1,%ymm1 5943 vpxor %ymm1,%ymm13,%ymm13 5944 vpshufb .rol16(%rip),%ymm13,%ymm13 5945 vpaddd %ymm13,%ymm9,%ymm9 5946 vpxor %ymm9,%ymm5,%ymm5 5947 vpsrld $20,%ymm5,%ymm3 5948 vpslld $12,%ymm5,%ymm5 5949 vpxor %ymm3,%ymm5,%ymm5 5950 vpaddd %ymm5,%ymm1,%ymm1 5951 vpxor %ymm1,%ymm13,%ymm13 5952 vpshufb .rol8(%rip),%ymm13,%ymm13 5953 vpaddd %ymm13,%ymm9,%ymm9 5954 vpxor %ymm9,%ymm5,%ymm5 5955 vpslld $7,%ymm5,%ymm3 5956 vpsrld $25,%ymm5,%ymm5 5957 vpxor %ymm3,%ymm5,%ymm5 5958 vpalignr $12,%ymm13,%ymm13,%ymm13 5959 vpalignr $8,%ymm9,%ymm9,%ymm9 5960 vpalignr $4,%ymm5,%ymm5,%ymm5 5961 vpaddd %ymm6,%ymm2,%ymm2 5962 vpxor %ymm2,%ymm14,%ymm14 5963 vpshufb .rol16(%rip),%ymm14,%ymm14 5964 vpaddd %ymm14,%ymm10,%ymm10 5965 vpxor %ymm10,%ymm6,%ymm6 5966 vpsrld $20,%ymm6,%ymm3 5967 vpslld $12,%ymm6,%ymm6 5968 vpxor %ymm3,%ymm6,%ymm6 5969 vpaddd %ymm6,%ymm2,%ymm2 5970 vpxor %ymm2,%ymm14,%ymm14 5971 vpshufb .rol8(%rip),%ymm14,%ymm14 5972 vpaddd %ymm14,%ymm10,%ymm10 5973 vpxor %ymm10,%ymm6,%ymm6 5974 vpslld $7,%ymm6,%ymm3 5975 vpsrld $25,%ymm6,%ymm6 5976 vpxor %ymm3,%ymm6,%ymm6 5977 vpalignr $12,%ymm14,%ymm14,%ymm14 5978 vpalignr $8,%ymm10,%ymm10,%ymm10 5979 vpalignr $4,%ymm6,%ymm6,%ymm6 5980 vpaddd %ymm4,%ymm0,%ymm0 5981 vpxor %ymm0,%ymm12,%ymm12 5982 vpshufb .rol16(%rip),%ymm12,%ymm12 5983 vpaddd %ymm12,%ymm8,%ymm8 5984 vpxor %ymm8,%ymm4,%ymm4 5985 vpsrld $20,%ymm4,%ymm3 5986 vpslld $12,%ymm4,%ymm4 5987 vpxor %ymm3,%ymm4,%ymm4 5988 vpaddd %ymm4,%ymm0,%ymm0 5989 vpxor %ymm0,%ymm12,%ymm12 5990 vpshufb .rol8(%rip),%ymm12,%ymm12 5991 vpaddd %ymm12,%ymm8,%ymm8 5992 vpxor %ymm8,%ymm4,%ymm4 5993 vpslld $7,%ymm4,%ymm3 5994 vpsrld $25,%ymm4,%ymm4 5995 vpxor %ymm3,%ymm4,%ymm4 5996 vpalignr $4,%ymm12,%ymm12,%ymm12 5997 vpalignr $8,%ymm8,%ymm8,%ymm8 5998 vpalignr $12,%ymm4,%ymm4,%ymm4 5999 vpaddd %ymm5,%ymm1,%ymm1 6000 vpxor %ymm1,%ymm13,%ymm13 6001 vpshufb .rol16(%rip),%ymm13,%ymm13 6002 vpaddd %ymm13,%ymm9,%ymm9 6003 vpxor %ymm9,%ymm5,%ymm5 6004 vpsrld $20,%ymm5,%ymm3 6005 vpslld $12,%ymm5,%ymm5 6006 vpxor %ymm3,%ymm5,%ymm5 6007 vpaddd %ymm5,%ymm1,%ymm1 6008 vpxor %ymm1,%ymm13,%ymm13 6009 vpshufb .rol8(%rip),%ymm13,%ymm13 6010 vpaddd %ymm13,%ymm9,%ymm9 6011 vpxor %ymm9,%ymm5,%ymm5 6012 vpslld $7,%ymm5,%ymm3 6013 vpsrld $25,%ymm5,%ymm5 6014 vpxor %ymm3,%ymm5,%ymm5 6015 vpalignr $4,%ymm13,%ymm13,%ymm13 6016 vpalignr $8,%ymm9,%ymm9,%ymm9 6017 vpalignr $12,%ymm5,%ymm5,%ymm5 6018 vpaddd %ymm6,%ymm2,%ymm2 6019 vpxor %ymm2,%ymm14,%ymm14 6020 vpshufb .rol16(%rip),%ymm14,%ymm14 6021 vpaddd %ymm14,%ymm10,%ymm10 6022 vpxor %ymm10,%ymm6,%ymm6 6023 vpsrld $20,%ymm6,%ymm3 6024 vpslld $12,%ymm6,%ymm6 6025 vpxor %ymm3,%ymm6,%ymm6 6026 vpaddd %ymm6,%ymm2,%ymm2 6027 vpxor %ymm2,%ymm14,%ymm14 6028 vpshufb .rol8(%rip),%ymm14,%ymm14 6029 vpaddd %ymm14,%ymm10,%ymm10 6030 vpxor %ymm10,%ymm6,%ymm6 6031 vpslld $7,%ymm6,%ymm3 6032 vpsrld $25,%ymm6,%ymm6 6033 vpxor %ymm3,%ymm6,%ymm6 6034 vpalignr $4,%ymm14,%ymm14,%ymm14 6035 vpalignr $8,%ymm10,%ymm10,%ymm10 6036 vpalignr $12,%ymm6,%ymm6,%ymm6 6037 6038 decq %r10 6039 jne 1b 6040 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6041 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6042 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6043 vpaddd %ymm7,%ymm4,%ymm4 6044 vpaddd %ymm7,%ymm5,%ymm5 6045 vpaddd %ymm7,%ymm6,%ymm6 6046 vpaddd %ymm11,%ymm8,%ymm8 6047 vpaddd %ymm11,%ymm9,%ymm9 6048 vpaddd %ymm11,%ymm10,%ymm10 6049 vpaddd 160(%rbp),%ymm12,%ymm12 6050 vpaddd 192(%rbp),%ymm13,%ymm13 6051 vpaddd 224(%rbp),%ymm14,%ymm14 6052 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 6053 6054 vpand .clamp(%rip),%ymm3,%ymm3 6055 vmovdqa %ymm3,0(%rbp) 6056 6057 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 6058 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 6059 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 6060 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 6061 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 6062 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 6063 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 6064 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 6065 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 6066 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 6067 jmp open_avx2_short 6068.size chacha20_poly1305_open_avx2, .-chacha20_poly1305_open_avx2 6069 6070 6071.type chacha20_poly1305_seal_avx2,@function 6072.align 64 6073chacha20_poly1305_seal_avx2: 6074 vzeroupper 6075 vmovdqa .chacha20_consts(%rip),%ymm0 6076 vbroadcasti128 0(%r9),%ymm4 6077 vbroadcasti128 16(%r9),%ymm8 6078 vbroadcasti128 32(%r9),%ymm12 6079 vpaddd .avx2_init(%rip),%ymm12,%ymm12 6080 cmpq $192,%rbx 6081 jbe seal_avx2_192 6082 cmpq $320,%rbx 6083 jbe seal_avx2_320 6084 vmovdqa %ymm0,%ymm1 6085 vmovdqa %ymm0,%ymm2 6086 vmovdqa %ymm0,%ymm3 6087 vmovdqa %ymm4,%ymm5 6088 vmovdqa %ymm4,%ymm6 6089 vmovdqa %ymm4,%ymm7 6090 vmovdqa %ymm4,64(%rbp) 6091 vmovdqa %ymm8,%ymm9 6092 vmovdqa %ymm8,%ymm10 6093 vmovdqa %ymm8,%ymm11 6094 vmovdqa %ymm8,96(%rbp) 6095 vmovdqa %ymm12,%ymm15 6096 vpaddd .avx2_inc(%rip),%ymm15,%ymm14 6097 vpaddd .avx2_inc(%rip),%ymm14,%ymm13 6098 vpaddd .avx2_inc(%rip),%ymm13,%ymm12 6099 vmovdqa %ymm12,160(%rbp) 6100 vmovdqa %ymm13,192(%rbp) 6101 vmovdqa %ymm14,224(%rbp) 6102 vmovdqa %ymm15,256(%rbp) 6103 movq $10,%r10 61041: 6105 vmovdqa %ymm8,128(%rbp) 6106 vmovdqa .rol16(%rip),%ymm8 6107 vpaddd %ymm7,%ymm3,%ymm3 6108 vpaddd %ymm6,%ymm2,%ymm2 6109 vpaddd %ymm5,%ymm1,%ymm1 6110 vpaddd %ymm4,%ymm0,%ymm0 6111 vpxor %ymm3,%ymm15,%ymm15 6112 vpxor %ymm2,%ymm14,%ymm14 6113 vpxor %ymm1,%ymm13,%ymm13 6114 vpxor %ymm0,%ymm12,%ymm12 6115 vpshufb %ymm8,%ymm15,%ymm15 6116 vpshufb %ymm8,%ymm14,%ymm14 6117 vpshufb %ymm8,%ymm13,%ymm13 6118 vpshufb %ymm8,%ymm12,%ymm12 6119 vmovdqa 128(%rbp),%ymm8 6120 vpaddd %ymm15,%ymm11,%ymm11 6121 vpaddd %ymm14,%ymm10,%ymm10 6122 vpaddd %ymm13,%ymm9,%ymm9 6123 vpaddd %ymm12,%ymm8,%ymm8 6124 vpxor %ymm11,%ymm7,%ymm7 6125 vpxor %ymm10,%ymm6,%ymm6 6126 vpxor %ymm9,%ymm5,%ymm5 6127 vpxor %ymm8,%ymm4,%ymm4 6128 vmovdqa %ymm8,128(%rbp) 6129 vpsrld $20,%ymm7,%ymm8 6130 vpslld $32-20,%ymm7,%ymm7 6131 vpxor %ymm8,%ymm7,%ymm7 6132 vpsrld $20,%ymm6,%ymm8 6133 vpslld $32-20,%ymm6,%ymm6 6134 vpxor %ymm8,%ymm6,%ymm6 6135 vpsrld $20,%ymm5,%ymm8 6136 vpslld $32-20,%ymm5,%ymm5 6137 vpxor %ymm8,%ymm5,%ymm5 6138 vpsrld $20,%ymm4,%ymm8 6139 vpslld $32-20,%ymm4,%ymm4 6140 vpxor %ymm8,%ymm4,%ymm4 6141 vmovdqa .rol8(%rip),%ymm8 6142 vpaddd %ymm7,%ymm3,%ymm3 6143 vpaddd %ymm6,%ymm2,%ymm2 6144 vpaddd %ymm5,%ymm1,%ymm1 6145 vpaddd %ymm4,%ymm0,%ymm0 6146 vpxor %ymm3,%ymm15,%ymm15 6147 vpxor %ymm2,%ymm14,%ymm14 6148 vpxor %ymm1,%ymm13,%ymm13 6149 vpxor %ymm0,%ymm12,%ymm12 6150 vpshufb %ymm8,%ymm15,%ymm15 6151 vpshufb %ymm8,%ymm14,%ymm14 6152 vpshufb %ymm8,%ymm13,%ymm13 6153 vpshufb %ymm8,%ymm12,%ymm12 6154 vmovdqa 128(%rbp),%ymm8 6155 vpaddd %ymm15,%ymm11,%ymm11 6156 vpaddd %ymm14,%ymm10,%ymm10 6157 vpaddd %ymm13,%ymm9,%ymm9 6158 vpaddd %ymm12,%ymm8,%ymm8 6159 vpxor %ymm11,%ymm7,%ymm7 6160 vpxor %ymm10,%ymm6,%ymm6 6161 vpxor %ymm9,%ymm5,%ymm5 6162 vpxor %ymm8,%ymm4,%ymm4 6163 vmovdqa %ymm8,128(%rbp) 6164 vpsrld $25,%ymm7,%ymm8 6165 vpslld $32-25,%ymm7,%ymm7 6166 vpxor %ymm8,%ymm7,%ymm7 6167 vpsrld $25,%ymm6,%ymm8 6168 vpslld $32-25,%ymm6,%ymm6 6169 vpxor %ymm8,%ymm6,%ymm6 6170 vpsrld $25,%ymm5,%ymm8 6171 vpslld $32-25,%ymm5,%ymm5 6172 vpxor %ymm8,%ymm5,%ymm5 6173 vpsrld $25,%ymm4,%ymm8 6174 vpslld $32-25,%ymm4,%ymm4 6175 vpxor %ymm8,%ymm4,%ymm4 6176 vmovdqa 128(%rbp),%ymm8 6177 vpalignr $4,%ymm7,%ymm7,%ymm7 6178 vpalignr $8,%ymm11,%ymm11,%ymm11 6179 vpalignr $12,%ymm15,%ymm15,%ymm15 6180 vpalignr $4,%ymm6,%ymm6,%ymm6 6181 vpalignr $8,%ymm10,%ymm10,%ymm10 6182 vpalignr $12,%ymm14,%ymm14,%ymm14 6183 vpalignr $4,%ymm5,%ymm5,%ymm5 6184 vpalignr $8,%ymm9,%ymm9,%ymm9 6185 vpalignr $12,%ymm13,%ymm13,%ymm13 6186 vpalignr $4,%ymm4,%ymm4,%ymm4 6187 vpalignr $8,%ymm8,%ymm8,%ymm8 6188 vpalignr $12,%ymm12,%ymm12,%ymm12 6189 vmovdqa %ymm8,128(%rbp) 6190 vmovdqa .rol16(%rip),%ymm8 6191 vpaddd %ymm7,%ymm3,%ymm3 6192 vpaddd %ymm6,%ymm2,%ymm2 6193 vpaddd %ymm5,%ymm1,%ymm1 6194 vpaddd %ymm4,%ymm0,%ymm0 6195 vpxor %ymm3,%ymm15,%ymm15 6196 vpxor %ymm2,%ymm14,%ymm14 6197 vpxor %ymm1,%ymm13,%ymm13 6198 vpxor %ymm0,%ymm12,%ymm12 6199 vpshufb %ymm8,%ymm15,%ymm15 6200 vpshufb %ymm8,%ymm14,%ymm14 6201 vpshufb %ymm8,%ymm13,%ymm13 6202 vpshufb %ymm8,%ymm12,%ymm12 6203 vmovdqa 128(%rbp),%ymm8 6204 vpaddd %ymm15,%ymm11,%ymm11 6205 vpaddd %ymm14,%ymm10,%ymm10 6206 vpaddd %ymm13,%ymm9,%ymm9 6207 vpaddd %ymm12,%ymm8,%ymm8 6208 vpxor %ymm11,%ymm7,%ymm7 6209 vpxor %ymm10,%ymm6,%ymm6 6210 vpxor %ymm9,%ymm5,%ymm5 6211 vpxor %ymm8,%ymm4,%ymm4 6212 vmovdqa %ymm8,128(%rbp) 6213 vpsrld $20,%ymm7,%ymm8 6214 vpslld $32-20,%ymm7,%ymm7 6215 vpxor %ymm8,%ymm7,%ymm7 6216 vpsrld $20,%ymm6,%ymm8 6217 vpslld $32-20,%ymm6,%ymm6 6218 vpxor %ymm8,%ymm6,%ymm6 6219 vpsrld $20,%ymm5,%ymm8 6220 vpslld $32-20,%ymm5,%ymm5 6221 vpxor %ymm8,%ymm5,%ymm5 6222 vpsrld $20,%ymm4,%ymm8 6223 vpslld $32-20,%ymm4,%ymm4 6224 vpxor %ymm8,%ymm4,%ymm4 6225 vmovdqa .rol8(%rip),%ymm8 6226 vpaddd %ymm7,%ymm3,%ymm3 6227 vpaddd %ymm6,%ymm2,%ymm2 6228 vpaddd %ymm5,%ymm1,%ymm1 6229 vpaddd %ymm4,%ymm0,%ymm0 6230 vpxor %ymm3,%ymm15,%ymm15 6231 vpxor %ymm2,%ymm14,%ymm14 6232 vpxor %ymm1,%ymm13,%ymm13 6233 vpxor %ymm0,%ymm12,%ymm12 6234 vpshufb %ymm8,%ymm15,%ymm15 6235 vpshufb %ymm8,%ymm14,%ymm14 6236 vpshufb %ymm8,%ymm13,%ymm13 6237 vpshufb %ymm8,%ymm12,%ymm12 6238 vmovdqa 128(%rbp),%ymm8 6239 vpaddd %ymm15,%ymm11,%ymm11 6240 vpaddd %ymm14,%ymm10,%ymm10 6241 vpaddd %ymm13,%ymm9,%ymm9 6242 vpaddd %ymm12,%ymm8,%ymm8 6243 vpxor %ymm11,%ymm7,%ymm7 6244 vpxor %ymm10,%ymm6,%ymm6 6245 vpxor %ymm9,%ymm5,%ymm5 6246 vpxor %ymm8,%ymm4,%ymm4 6247 vmovdqa %ymm8,128(%rbp) 6248 vpsrld $25,%ymm7,%ymm8 6249 vpslld $32-25,%ymm7,%ymm7 6250 vpxor %ymm8,%ymm7,%ymm7 6251 vpsrld $25,%ymm6,%ymm8 6252 vpslld $32-25,%ymm6,%ymm6 6253 vpxor %ymm8,%ymm6,%ymm6 6254 vpsrld $25,%ymm5,%ymm8 6255 vpslld $32-25,%ymm5,%ymm5 6256 vpxor %ymm8,%ymm5,%ymm5 6257 vpsrld $25,%ymm4,%ymm8 6258 vpslld $32-25,%ymm4,%ymm4 6259 vpxor %ymm8,%ymm4,%ymm4 6260 vmovdqa 128(%rbp),%ymm8 6261 vpalignr $12,%ymm7,%ymm7,%ymm7 6262 vpalignr $8,%ymm11,%ymm11,%ymm11 6263 vpalignr $4,%ymm15,%ymm15,%ymm15 6264 vpalignr $12,%ymm6,%ymm6,%ymm6 6265 vpalignr $8,%ymm10,%ymm10,%ymm10 6266 vpalignr $4,%ymm14,%ymm14,%ymm14 6267 vpalignr $12,%ymm5,%ymm5,%ymm5 6268 vpalignr $8,%ymm9,%ymm9,%ymm9 6269 vpalignr $4,%ymm13,%ymm13,%ymm13 6270 vpalignr $12,%ymm4,%ymm4,%ymm4 6271 vpalignr $8,%ymm8,%ymm8,%ymm8 6272 vpalignr $4,%ymm12,%ymm12,%ymm12 6273 6274 decq %r10 6275 jnz 1b 6276 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 6277 vpaddd 64(%rbp),%ymm7,%ymm7 6278 vpaddd 96(%rbp),%ymm11,%ymm11 6279 vpaddd 256(%rbp),%ymm15,%ymm15 6280 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6281 vpaddd 64(%rbp),%ymm6,%ymm6 6282 vpaddd 96(%rbp),%ymm10,%ymm10 6283 vpaddd 224(%rbp),%ymm14,%ymm14 6284 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6285 vpaddd 64(%rbp),%ymm5,%ymm5 6286 vpaddd 96(%rbp),%ymm9,%ymm9 6287 vpaddd 192(%rbp),%ymm13,%ymm13 6288 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6289 vpaddd 64(%rbp),%ymm4,%ymm4 6290 vpaddd 96(%rbp),%ymm8,%ymm8 6291 vpaddd 160(%rbp),%ymm12,%ymm12 6292 6293 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6294 vperm2i128 $0x02,%ymm3,%ymm7,%ymm15 6295 vperm2i128 $0x13,%ymm3,%ymm7,%ymm3 6296 vpand .clamp(%rip),%ymm15,%ymm15 6297 vmovdqa %ymm15,0(%rbp) 6298 movq %r8,%r8 6299 call poly_hash_ad_internal 6300 6301 vpxor 0(%rsi),%ymm3,%ymm3 6302 vpxor 32(%rsi),%ymm11,%ymm11 6303 vmovdqu %ymm3,0(%rdi) 6304 vmovdqu %ymm11,32(%rdi) 6305 vperm2i128 $0x02,%ymm2,%ymm6,%ymm15 6306 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6307 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6308 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6309 vpxor 0+64(%rsi),%ymm15,%ymm15 6310 vpxor 32+64(%rsi),%ymm2,%ymm2 6311 vpxor 64+64(%rsi),%ymm6,%ymm6 6312 vpxor 96+64(%rsi),%ymm10,%ymm10 6313 vmovdqu %ymm15,0+64(%rdi) 6314 vmovdqu %ymm2,32+64(%rdi) 6315 vmovdqu %ymm6,64+64(%rdi) 6316 vmovdqu %ymm10,96+64(%rdi) 6317 vperm2i128 $0x02,%ymm1,%ymm5,%ymm15 6318 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6319 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6320 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6321 vpxor 0+192(%rsi),%ymm15,%ymm15 6322 vpxor 32+192(%rsi),%ymm1,%ymm1 6323 vpxor 64+192(%rsi),%ymm5,%ymm5 6324 vpxor 96+192(%rsi),%ymm9,%ymm9 6325 vmovdqu %ymm15,0+192(%rdi) 6326 vmovdqu %ymm1,32+192(%rdi) 6327 vmovdqu %ymm5,64+192(%rdi) 6328 vmovdqu %ymm9,96+192(%rdi) 6329 vperm2i128 $0x13,%ymm0,%ymm4,%ymm15 6330 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 6331 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 6332 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 6333 vmovdqa %ymm15,%ymm8 6334 6335 leaq 320(%rsi),%rsi 6336 subq $320,%rbx 6337 movq $320,%rcx 6338 cmpq $128,%rbx 6339 jbe seal_avx2_hash 6340 vpxor 0(%rsi),%ymm0,%ymm0 6341 vpxor 32(%rsi),%ymm4,%ymm4 6342 vpxor 64(%rsi),%ymm8,%ymm8 6343 vpxor 96(%rsi),%ymm12,%ymm12 6344 vmovdqu %ymm0,320(%rdi) 6345 vmovdqu %ymm4,352(%rdi) 6346 vmovdqu %ymm8,384(%rdi) 6347 vmovdqu %ymm12,416(%rdi) 6348 leaq 128(%rsi),%rsi 6349 subq $128,%rbx 6350 movq $8,%rcx 6351 movq $2,%r8 6352 cmpq $128,%rbx 6353 jbe seal_avx2_tail_128 6354 cmpq $256,%rbx 6355 jbe seal_avx2_tail_256 6356 cmpq $384,%rbx 6357 jbe seal_avx2_tail_384 6358 cmpq $512,%rbx 6359 jbe seal_avx2_tail_512 6360 vmovdqa .chacha20_consts(%rip),%ymm0 6361 vmovdqa 64(%rbp),%ymm4 6362 vmovdqa 96(%rbp),%ymm8 6363 vmovdqa %ymm0,%ymm1 6364 vmovdqa %ymm4,%ymm5 6365 vmovdqa %ymm8,%ymm9 6366 vmovdqa %ymm0,%ymm2 6367 vmovdqa %ymm4,%ymm6 6368 vmovdqa %ymm8,%ymm10 6369 vmovdqa %ymm0,%ymm3 6370 vmovdqa %ymm4,%ymm7 6371 vmovdqa %ymm8,%ymm11 6372 vmovdqa .avx2_inc(%rip),%ymm12 6373 vpaddd 160(%rbp),%ymm12,%ymm15 6374 vpaddd %ymm15,%ymm12,%ymm14 6375 vpaddd %ymm14,%ymm12,%ymm13 6376 vpaddd %ymm13,%ymm12,%ymm12 6377 vmovdqa %ymm15,256(%rbp) 6378 vmovdqa %ymm14,224(%rbp) 6379 vmovdqa %ymm13,192(%rbp) 6380 vmovdqa %ymm12,160(%rbp) 6381 vmovdqa %ymm8,128(%rbp) 6382 vmovdqa .rol16(%rip),%ymm8 6383 vpaddd %ymm7,%ymm3,%ymm3 6384 vpaddd %ymm6,%ymm2,%ymm2 6385 vpaddd %ymm5,%ymm1,%ymm1 6386 vpaddd %ymm4,%ymm0,%ymm0 6387 vpxor %ymm3,%ymm15,%ymm15 6388 vpxor %ymm2,%ymm14,%ymm14 6389 vpxor %ymm1,%ymm13,%ymm13 6390 vpxor %ymm0,%ymm12,%ymm12 6391 vpshufb %ymm8,%ymm15,%ymm15 6392 vpshufb %ymm8,%ymm14,%ymm14 6393 vpshufb %ymm8,%ymm13,%ymm13 6394 vpshufb %ymm8,%ymm12,%ymm12 6395 vmovdqa 128(%rbp),%ymm8 6396 vpaddd %ymm15,%ymm11,%ymm11 6397 vpaddd %ymm14,%ymm10,%ymm10 6398 vpaddd %ymm13,%ymm9,%ymm9 6399 vpaddd %ymm12,%ymm8,%ymm8 6400 vpxor %ymm11,%ymm7,%ymm7 6401 vpxor %ymm10,%ymm6,%ymm6 6402 vpxor %ymm9,%ymm5,%ymm5 6403 vpxor %ymm8,%ymm4,%ymm4 6404 vmovdqa %ymm8,128(%rbp) 6405 vpsrld $20,%ymm7,%ymm8 6406 vpslld $32-20,%ymm7,%ymm7 6407 vpxor %ymm8,%ymm7,%ymm7 6408 vpsrld $20,%ymm6,%ymm8 6409 vpslld $32-20,%ymm6,%ymm6 6410 vpxor %ymm8,%ymm6,%ymm6 6411 vpsrld $20,%ymm5,%ymm8 6412 vpslld $32-20,%ymm5,%ymm5 6413 vpxor %ymm8,%ymm5,%ymm5 6414 vpsrld $20,%ymm4,%ymm8 6415 vpslld $32-20,%ymm4,%ymm4 6416 vpxor %ymm8,%ymm4,%ymm4 6417 vmovdqa .rol8(%rip),%ymm8 6418 vpaddd %ymm7,%ymm3,%ymm3 6419 vpaddd %ymm6,%ymm2,%ymm2 6420 vpaddd %ymm5,%ymm1,%ymm1 6421 vpaddd %ymm4,%ymm0,%ymm0 6422 vpxor %ymm3,%ymm15,%ymm15 6423 vpxor %ymm2,%ymm14,%ymm14 6424 vpxor %ymm1,%ymm13,%ymm13 6425 vpxor %ymm0,%ymm12,%ymm12 6426 vpshufb %ymm8,%ymm15,%ymm15 6427 vpshufb %ymm8,%ymm14,%ymm14 6428 vpshufb %ymm8,%ymm13,%ymm13 6429 vpshufb %ymm8,%ymm12,%ymm12 6430 vmovdqa 128(%rbp),%ymm8 6431 vpaddd %ymm15,%ymm11,%ymm11 6432 vpaddd %ymm14,%ymm10,%ymm10 6433 vpaddd %ymm13,%ymm9,%ymm9 6434 vpaddd %ymm12,%ymm8,%ymm8 6435 vpxor %ymm11,%ymm7,%ymm7 6436 vpxor %ymm10,%ymm6,%ymm6 6437 vpxor %ymm9,%ymm5,%ymm5 6438 vpxor %ymm8,%ymm4,%ymm4 6439 vmovdqa %ymm8,128(%rbp) 6440 vpsrld $25,%ymm7,%ymm8 6441 vpslld $32-25,%ymm7,%ymm7 6442 vpxor %ymm8,%ymm7,%ymm7 6443 vpsrld $25,%ymm6,%ymm8 6444 vpslld $32-25,%ymm6,%ymm6 6445 vpxor %ymm8,%ymm6,%ymm6 6446 vpsrld $25,%ymm5,%ymm8 6447 vpslld $32-25,%ymm5,%ymm5 6448 vpxor %ymm8,%ymm5,%ymm5 6449 vpsrld $25,%ymm4,%ymm8 6450 vpslld $32-25,%ymm4,%ymm4 6451 vpxor %ymm8,%ymm4,%ymm4 6452 vmovdqa 128(%rbp),%ymm8 6453 vpalignr $4,%ymm7,%ymm7,%ymm7 6454 vpalignr $8,%ymm11,%ymm11,%ymm11 6455 vpalignr $12,%ymm15,%ymm15,%ymm15 6456 vpalignr $4,%ymm6,%ymm6,%ymm6 6457 vpalignr $8,%ymm10,%ymm10,%ymm10 6458 vpalignr $12,%ymm14,%ymm14,%ymm14 6459 vpalignr $4,%ymm5,%ymm5,%ymm5 6460 vpalignr $8,%ymm9,%ymm9,%ymm9 6461 vpalignr $12,%ymm13,%ymm13,%ymm13 6462 vpalignr $4,%ymm4,%ymm4,%ymm4 6463 vpalignr $8,%ymm8,%ymm8,%ymm8 6464 vpalignr $12,%ymm12,%ymm12,%ymm12 6465 vmovdqa %ymm8,128(%rbp) 6466 vmovdqa .rol16(%rip),%ymm8 6467 vpaddd %ymm7,%ymm3,%ymm3 6468 vpaddd %ymm6,%ymm2,%ymm2 6469 vpaddd %ymm5,%ymm1,%ymm1 6470 vpaddd %ymm4,%ymm0,%ymm0 6471 vpxor %ymm3,%ymm15,%ymm15 6472 vpxor %ymm2,%ymm14,%ymm14 6473 vpxor %ymm1,%ymm13,%ymm13 6474 vpxor %ymm0,%ymm12,%ymm12 6475 vpshufb %ymm8,%ymm15,%ymm15 6476 vpshufb %ymm8,%ymm14,%ymm14 6477 vpshufb %ymm8,%ymm13,%ymm13 6478 vpshufb %ymm8,%ymm12,%ymm12 6479 vmovdqa 128(%rbp),%ymm8 6480 vpaddd %ymm15,%ymm11,%ymm11 6481 vpaddd %ymm14,%ymm10,%ymm10 6482 vpaddd %ymm13,%ymm9,%ymm9 6483 vpaddd %ymm12,%ymm8,%ymm8 6484 vpxor %ymm11,%ymm7,%ymm7 6485 vpxor %ymm10,%ymm6,%ymm6 6486 vpxor %ymm9,%ymm5,%ymm5 6487 vpxor %ymm8,%ymm4,%ymm4 6488 vmovdqa %ymm8,128(%rbp) 6489 vpsrld $20,%ymm7,%ymm8 6490 vpslld $32-20,%ymm7,%ymm7 6491 vpxor %ymm8,%ymm7,%ymm7 6492 vpsrld $20,%ymm6,%ymm8 6493 vpslld $32-20,%ymm6,%ymm6 6494 vpxor %ymm8,%ymm6,%ymm6 6495 vpsrld $20,%ymm5,%ymm8 6496 vpslld $32-20,%ymm5,%ymm5 6497 vpxor %ymm8,%ymm5,%ymm5 6498 vpsrld $20,%ymm4,%ymm8 6499 vpslld $32-20,%ymm4,%ymm4 6500 vpxor %ymm8,%ymm4,%ymm4 6501 vmovdqa .rol8(%rip),%ymm8 6502 vpaddd %ymm7,%ymm3,%ymm3 6503 vpaddd %ymm6,%ymm2,%ymm2 6504 vpaddd %ymm5,%ymm1,%ymm1 6505 vpaddd %ymm4,%ymm0,%ymm0 6506 vpxor %ymm3,%ymm15,%ymm15 6507 vpxor %ymm2,%ymm14,%ymm14 6508 vpxor %ymm1,%ymm13,%ymm13 6509 vpxor %ymm0,%ymm12,%ymm12 6510 vpshufb %ymm8,%ymm15,%ymm15 6511 vpshufb %ymm8,%ymm14,%ymm14 6512 vpshufb %ymm8,%ymm13,%ymm13 6513 vpshufb %ymm8,%ymm12,%ymm12 6514 vmovdqa 128(%rbp),%ymm8 6515 vpaddd %ymm15,%ymm11,%ymm11 6516 vpaddd %ymm14,%ymm10,%ymm10 6517 vpaddd %ymm13,%ymm9,%ymm9 6518 vpaddd %ymm12,%ymm8,%ymm8 6519 vpxor %ymm11,%ymm7,%ymm7 6520 vpxor %ymm10,%ymm6,%ymm6 6521 vpxor %ymm9,%ymm5,%ymm5 6522 vpxor %ymm8,%ymm4,%ymm4 6523 vmovdqa %ymm8,128(%rbp) 6524 vpsrld $25,%ymm7,%ymm8 6525 vpslld $32-25,%ymm7,%ymm7 6526 vpxor %ymm8,%ymm7,%ymm7 6527 vpsrld $25,%ymm6,%ymm8 6528 vpslld $32-25,%ymm6,%ymm6 6529 vpxor %ymm8,%ymm6,%ymm6 6530 vpsrld $25,%ymm5,%ymm8 6531 vpslld $32-25,%ymm5,%ymm5 6532 vpxor %ymm8,%ymm5,%ymm5 6533 vpsrld $25,%ymm4,%ymm8 6534 vpslld $32-25,%ymm4,%ymm4 6535 vpxor %ymm8,%ymm4,%ymm4 6536 vmovdqa 128(%rbp),%ymm8 6537 vpalignr $12,%ymm7,%ymm7,%ymm7 6538 vpalignr $8,%ymm11,%ymm11,%ymm11 6539 vpalignr $4,%ymm15,%ymm15,%ymm15 6540 vpalignr $12,%ymm6,%ymm6,%ymm6 6541 vpalignr $8,%ymm10,%ymm10,%ymm10 6542 vpalignr $4,%ymm14,%ymm14,%ymm14 6543 vpalignr $12,%ymm5,%ymm5,%ymm5 6544 vpalignr $8,%ymm9,%ymm9,%ymm9 6545 vpalignr $4,%ymm13,%ymm13,%ymm13 6546 vpalignr $12,%ymm4,%ymm4,%ymm4 6547 vpalignr $8,%ymm8,%ymm8,%ymm8 6548 vpalignr $4,%ymm12,%ymm12,%ymm12 6549 vmovdqa %ymm8,128(%rbp) 6550 vmovdqa .rol16(%rip),%ymm8 6551 vpaddd %ymm7,%ymm3,%ymm3 6552 vpaddd %ymm6,%ymm2,%ymm2 6553 vpaddd %ymm5,%ymm1,%ymm1 6554 vpaddd %ymm4,%ymm0,%ymm0 6555 vpxor %ymm3,%ymm15,%ymm15 6556 vpxor %ymm2,%ymm14,%ymm14 6557 vpxor %ymm1,%ymm13,%ymm13 6558 vpxor %ymm0,%ymm12,%ymm12 6559 vpshufb %ymm8,%ymm15,%ymm15 6560 vpshufb %ymm8,%ymm14,%ymm14 6561 vpshufb %ymm8,%ymm13,%ymm13 6562 vpshufb %ymm8,%ymm12,%ymm12 6563 vmovdqa 128(%rbp),%ymm8 6564 vpaddd %ymm15,%ymm11,%ymm11 6565 vpaddd %ymm14,%ymm10,%ymm10 6566 vpaddd %ymm13,%ymm9,%ymm9 6567 vpaddd %ymm12,%ymm8,%ymm8 6568 vpxor %ymm11,%ymm7,%ymm7 6569 vpxor %ymm10,%ymm6,%ymm6 6570 vpxor %ymm9,%ymm5,%ymm5 6571 vpxor %ymm8,%ymm4,%ymm4 6572 vmovdqa %ymm8,128(%rbp) 6573 vpsrld $20,%ymm7,%ymm8 6574 vpslld $32-20,%ymm7,%ymm7 6575 vpxor %ymm8,%ymm7,%ymm7 6576 vpsrld $20,%ymm6,%ymm8 6577 vpslld $32-20,%ymm6,%ymm6 6578 vpxor %ymm8,%ymm6,%ymm6 6579 vpsrld $20,%ymm5,%ymm8 6580 vpslld $32-20,%ymm5,%ymm5 6581 vpxor %ymm8,%ymm5,%ymm5 6582 vpsrld $20,%ymm4,%ymm8 6583 vpslld $32-20,%ymm4,%ymm4 6584 vpxor %ymm8,%ymm4,%ymm4 6585 vmovdqa .rol8(%rip),%ymm8 6586 vpaddd %ymm7,%ymm3,%ymm3 6587 vpaddd %ymm6,%ymm2,%ymm2 6588 vpaddd %ymm5,%ymm1,%ymm1 6589 vpaddd %ymm4,%ymm0,%ymm0 6590 6591 subq $16,%rdi 6592 movq $9,%rcx 6593 jmp 4f 65941: 6595 vmovdqa .chacha20_consts(%rip),%ymm0 6596 vmovdqa 64(%rbp),%ymm4 6597 vmovdqa 96(%rbp),%ymm8 6598 vmovdqa %ymm0,%ymm1 6599 vmovdqa %ymm4,%ymm5 6600 vmovdqa %ymm8,%ymm9 6601 vmovdqa %ymm0,%ymm2 6602 vmovdqa %ymm4,%ymm6 6603 vmovdqa %ymm8,%ymm10 6604 vmovdqa %ymm0,%ymm3 6605 vmovdqa %ymm4,%ymm7 6606 vmovdqa %ymm8,%ymm11 6607 vmovdqa .avx2_inc(%rip),%ymm12 6608 vpaddd 160(%rbp),%ymm12,%ymm15 6609 vpaddd %ymm15,%ymm12,%ymm14 6610 vpaddd %ymm14,%ymm12,%ymm13 6611 vpaddd %ymm13,%ymm12,%ymm12 6612 vmovdqa %ymm15,256(%rbp) 6613 vmovdqa %ymm14,224(%rbp) 6614 vmovdqa %ymm13,192(%rbp) 6615 vmovdqa %ymm12,160(%rbp) 6616 6617 movq $10,%rcx 66182: 6619 addq 0(%rdi),%r10 6620 adcq 8+0(%rdi),%r11 6621 adcq $1,%r12 6622 vmovdqa %ymm8,128(%rbp) 6623 vmovdqa .rol16(%rip),%ymm8 6624 vpaddd %ymm7,%ymm3,%ymm3 6625 vpaddd %ymm6,%ymm2,%ymm2 6626 vpaddd %ymm5,%ymm1,%ymm1 6627 vpaddd %ymm4,%ymm0,%ymm0 6628 vpxor %ymm3,%ymm15,%ymm15 6629 vpxor %ymm2,%ymm14,%ymm14 6630 vpxor %ymm1,%ymm13,%ymm13 6631 vpxor %ymm0,%ymm12,%ymm12 6632 movq 0+0(%rbp),%rdx 6633 movq %rdx,%r15 6634 mulxq %r10,%r13,%r14 6635 mulxq %r11,%rax,%rdx 6636 imulq %r12,%r15 6637 addq %rax,%r14 6638 adcq %rdx,%r15 6639 vpshufb %ymm8,%ymm15,%ymm15 6640 vpshufb %ymm8,%ymm14,%ymm14 6641 vpshufb %ymm8,%ymm13,%ymm13 6642 vpshufb %ymm8,%ymm12,%ymm12 6643 vmovdqa 128(%rbp),%ymm8 6644 vpaddd %ymm15,%ymm11,%ymm11 6645 vpaddd %ymm14,%ymm10,%ymm10 6646 vpaddd %ymm13,%ymm9,%ymm9 6647 vpaddd %ymm12,%ymm8,%ymm8 6648 movq 8+0(%rbp),%rdx 6649 mulxq %r10,%r10,%rax 6650 addq %r10,%r14 6651 mulxq %r11,%r11,%r9 6652 adcq %r11,%r15 6653 adcq $0,%r9 6654 imulq %r12,%rdx 6655 vpxor %ymm11,%ymm7,%ymm7 6656 vpxor %ymm10,%ymm6,%ymm6 6657 vpxor %ymm9,%ymm5,%ymm5 6658 vpxor %ymm8,%ymm4,%ymm4 6659 vmovdqa %ymm8,128(%rbp) 6660 vpsrld $20,%ymm7,%ymm8 6661 vpslld $32-20,%ymm7,%ymm7 6662 vpxor %ymm8,%ymm7,%ymm7 6663 vpsrld $20,%ymm6,%ymm8 6664 vpslld $32-20,%ymm6,%ymm6 6665 vpxor %ymm8,%ymm6,%ymm6 6666 vpsrld $20,%ymm5,%ymm8 6667 addq %rax,%r15 6668 adcq %rdx,%r9 6669 vpslld $32-20,%ymm5,%ymm5 6670 vpxor %ymm8,%ymm5,%ymm5 6671 vpsrld $20,%ymm4,%ymm8 6672 vpslld $32-20,%ymm4,%ymm4 6673 vpxor %ymm8,%ymm4,%ymm4 6674 vmovdqa .rol8(%rip),%ymm8 6675 vpaddd %ymm7,%ymm3,%ymm3 6676 vpaddd %ymm6,%ymm2,%ymm2 6677 vpaddd %ymm5,%ymm1,%ymm1 6678 vpaddd %ymm4,%ymm0,%ymm0 6679 movq %r13,%r10 6680 movq %r14,%r11 6681 movq %r15,%r12 6682 andq $3,%r12 6683 movq %r15,%r13 6684 andq $-4,%r13 6685 movq %r9,%r14 6686 shrdq $2,%r9,%r15 6687 shrq $2,%r9 6688 addq %r13,%r10 6689 adcq %r14,%r11 6690 adcq $0,%r12 6691 addq %r15,%r10 6692 adcq %r9,%r11 6693 adcq $0,%r12 6694 66954: 6696 vpxor %ymm3,%ymm15,%ymm15 6697 vpxor %ymm2,%ymm14,%ymm14 6698 vpxor %ymm1,%ymm13,%ymm13 6699 vpxor %ymm0,%ymm12,%ymm12 6700 vpshufb %ymm8,%ymm15,%ymm15 6701 vpshufb %ymm8,%ymm14,%ymm14 6702 vpshufb %ymm8,%ymm13,%ymm13 6703 vpshufb %ymm8,%ymm12,%ymm12 6704 vmovdqa 128(%rbp),%ymm8 6705 addq 16(%rdi),%r10 6706 adcq 8+16(%rdi),%r11 6707 adcq $1,%r12 6708 vpaddd %ymm15,%ymm11,%ymm11 6709 vpaddd %ymm14,%ymm10,%ymm10 6710 vpaddd %ymm13,%ymm9,%ymm9 6711 vpaddd %ymm12,%ymm8,%ymm8 6712 vpxor %ymm11,%ymm7,%ymm7 6713 vpxor %ymm10,%ymm6,%ymm6 6714 vpxor %ymm9,%ymm5,%ymm5 6715 vpxor %ymm8,%ymm4,%ymm4 6716 movq 0+0(%rbp),%rdx 6717 movq %rdx,%r15 6718 mulxq %r10,%r13,%r14 6719 mulxq %r11,%rax,%rdx 6720 imulq %r12,%r15 6721 addq %rax,%r14 6722 adcq %rdx,%r15 6723 vmovdqa %ymm8,128(%rbp) 6724 vpsrld $25,%ymm7,%ymm8 6725 vpslld $32-25,%ymm7,%ymm7 6726 vpxor %ymm8,%ymm7,%ymm7 6727 vpsrld $25,%ymm6,%ymm8 6728 vpslld $32-25,%ymm6,%ymm6 6729 vpxor %ymm8,%ymm6,%ymm6 6730 vpsrld $25,%ymm5,%ymm8 6731 vpslld $32-25,%ymm5,%ymm5 6732 vpxor %ymm8,%ymm5,%ymm5 6733 vpsrld $25,%ymm4,%ymm8 6734 vpslld $32-25,%ymm4,%ymm4 6735 vpxor %ymm8,%ymm4,%ymm4 6736 vmovdqa 128(%rbp),%ymm8 6737 vpalignr $4,%ymm7,%ymm7,%ymm7 6738 vpalignr $8,%ymm11,%ymm11,%ymm11 6739 vpalignr $12,%ymm15,%ymm15,%ymm15 6740 vpalignr $4,%ymm6,%ymm6,%ymm6 6741 movq 8+0(%rbp),%rdx 6742 mulxq %r10,%r10,%rax 6743 addq %r10,%r14 6744 mulxq %r11,%r11,%r9 6745 adcq %r11,%r15 6746 adcq $0,%r9 6747 imulq %r12,%rdx 6748 vpalignr $8,%ymm10,%ymm10,%ymm10 6749 vpalignr $12,%ymm14,%ymm14,%ymm14 6750 vpalignr $4,%ymm5,%ymm5,%ymm5 6751 vpalignr $8,%ymm9,%ymm9,%ymm9 6752 vpalignr $12,%ymm13,%ymm13,%ymm13 6753 vpalignr $4,%ymm4,%ymm4,%ymm4 6754 vpalignr $8,%ymm8,%ymm8,%ymm8 6755 vpalignr $12,%ymm12,%ymm12,%ymm12 6756 vmovdqa %ymm8,128(%rbp) 6757 vmovdqa .rol16(%rip),%ymm8 6758 vpaddd %ymm7,%ymm3,%ymm3 6759 vpaddd %ymm6,%ymm2,%ymm2 6760 vpaddd %ymm5,%ymm1,%ymm1 6761 vpaddd %ymm4,%ymm0,%ymm0 6762 vpxor %ymm3,%ymm15,%ymm15 6763 vpxor %ymm2,%ymm14,%ymm14 6764 vpxor %ymm1,%ymm13,%ymm13 6765 vpxor %ymm0,%ymm12,%ymm12 6766 addq %rax,%r15 6767 adcq %rdx,%r9 6768 vpshufb %ymm8,%ymm15,%ymm15 6769 vpshufb %ymm8,%ymm14,%ymm14 6770 vpshufb %ymm8,%ymm13,%ymm13 6771 vpshufb %ymm8,%ymm12,%ymm12 6772 vmovdqa 128(%rbp),%ymm8 6773 vpaddd %ymm15,%ymm11,%ymm11 6774 vpaddd %ymm14,%ymm10,%ymm10 6775 vpaddd %ymm13,%ymm9,%ymm9 6776 vpaddd %ymm12,%ymm8,%ymm8 6777 movq %r13,%r10 6778 movq %r14,%r11 6779 movq %r15,%r12 6780 andq $3,%r12 6781 movq %r15,%r13 6782 andq $-4,%r13 6783 movq %r9,%r14 6784 shrdq $2,%r9,%r15 6785 shrq $2,%r9 6786 addq %r13,%r10 6787 adcq %r14,%r11 6788 adcq $0,%r12 6789 addq %r15,%r10 6790 adcq %r9,%r11 6791 adcq $0,%r12 6792 vpxor %ymm11,%ymm7,%ymm7 6793 vpxor %ymm10,%ymm6,%ymm6 6794 vpxor %ymm9,%ymm5,%ymm5 6795 vpxor %ymm8,%ymm4,%ymm4 6796 vmovdqa %ymm8,128(%rbp) 6797 vpsrld $20,%ymm7,%ymm8 6798 vpslld $32-20,%ymm7,%ymm7 6799 vpxor %ymm8,%ymm7,%ymm7 6800 addq 32(%rdi),%r10 6801 adcq 8+32(%rdi),%r11 6802 adcq $1,%r12 6803 6804 leaq 48(%rdi),%rdi 6805 vpsrld $20,%ymm6,%ymm8 6806 vpslld $32-20,%ymm6,%ymm6 6807 vpxor %ymm8,%ymm6,%ymm6 6808 vpsrld $20,%ymm5,%ymm8 6809 vpslld $32-20,%ymm5,%ymm5 6810 vpxor %ymm8,%ymm5,%ymm5 6811 vpsrld $20,%ymm4,%ymm8 6812 vpslld $32-20,%ymm4,%ymm4 6813 vpxor %ymm8,%ymm4,%ymm4 6814 vmovdqa .rol8(%rip),%ymm8 6815 vpaddd %ymm7,%ymm3,%ymm3 6816 vpaddd %ymm6,%ymm2,%ymm2 6817 vpaddd %ymm5,%ymm1,%ymm1 6818 vpaddd %ymm4,%ymm0,%ymm0 6819 vpxor %ymm3,%ymm15,%ymm15 6820 vpxor %ymm2,%ymm14,%ymm14 6821 vpxor %ymm1,%ymm13,%ymm13 6822 vpxor %ymm0,%ymm12,%ymm12 6823 movq 0+0(%rbp),%rdx 6824 movq %rdx,%r15 6825 mulxq %r10,%r13,%r14 6826 mulxq %r11,%rax,%rdx 6827 imulq %r12,%r15 6828 addq %rax,%r14 6829 adcq %rdx,%r15 6830 vpshufb %ymm8,%ymm15,%ymm15 6831 vpshufb %ymm8,%ymm14,%ymm14 6832 vpshufb %ymm8,%ymm13,%ymm13 6833 vpshufb %ymm8,%ymm12,%ymm12 6834 vmovdqa 128(%rbp),%ymm8 6835 vpaddd %ymm15,%ymm11,%ymm11 6836 vpaddd %ymm14,%ymm10,%ymm10 6837 vpaddd %ymm13,%ymm9,%ymm9 6838 movq 8+0(%rbp),%rdx 6839 mulxq %r10,%r10,%rax 6840 addq %r10,%r14 6841 mulxq %r11,%r11,%r9 6842 adcq %r11,%r15 6843 adcq $0,%r9 6844 imulq %r12,%rdx 6845 vpaddd %ymm12,%ymm8,%ymm8 6846 vpxor %ymm11,%ymm7,%ymm7 6847 vpxor %ymm10,%ymm6,%ymm6 6848 vpxor %ymm9,%ymm5,%ymm5 6849 vpxor %ymm8,%ymm4,%ymm4 6850 vmovdqa %ymm8,128(%rbp) 6851 vpsrld $25,%ymm7,%ymm8 6852 vpslld $32-25,%ymm7,%ymm7 6853 addq %rax,%r15 6854 adcq %rdx,%r9 6855 vpxor %ymm8,%ymm7,%ymm7 6856 vpsrld $25,%ymm6,%ymm8 6857 vpslld $32-25,%ymm6,%ymm6 6858 vpxor %ymm8,%ymm6,%ymm6 6859 vpsrld $25,%ymm5,%ymm8 6860 vpslld $32-25,%ymm5,%ymm5 6861 vpxor %ymm8,%ymm5,%ymm5 6862 vpsrld $25,%ymm4,%ymm8 6863 vpslld $32-25,%ymm4,%ymm4 6864 vpxor %ymm8,%ymm4,%ymm4 6865 vmovdqa 128(%rbp),%ymm8 6866 vpalignr $12,%ymm7,%ymm7,%ymm7 6867 vpalignr $8,%ymm11,%ymm11,%ymm11 6868 vpalignr $4,%ymm15,%ymm15,%ymm15 6869 vpalignr $12,%ymm6,%ymm6,%ymm6 6870 vpalignr $8,%ymm10,%ymm10,%ymm10 6871 vpalignr $4,%ymm14,%ymm14,%ymm14 6872 vpalignr $12,%ymm5,%ymm5,%ymm5 6873 movq %r13,%r10 6874 movq %r14,%r11 6875 movq %r15,%r12 6876 andq $3,%r12 6877 movq %r15,%r13 6878 andq $-4,%r13 6879 movq %r9,%r14 6880 shrdq $2,%r9,%r15 6881 shrq $2,%r9 6882 addq %r13,%r10 6883 adcq %r14,%r11 6884 adcq $0,%r12 6885 addq %r15,%r10 6886 adcq %r9,%r11 6887 adcq $0,%r12 6888 vpalignr $8,%ymm9,%ymm9,%ymm9 6889 vpalignr $4,%ymm13,%ymm13,%ymm13 6890 vpalignr $12,%ymm4,%ymm4,%ymm4 6891 vpalignr $8,%ymm8,%ymm8,%ymm8 6892 vpalignr $4,%ymm12,%ymm12,%ymm12 6893 6894 decq %rcx 6895 jne 2b 6896 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 6897 vpaddd 64(%rbp),%ymm7,%ymm7 6898 vpaddd 96(%rbp),%ymm11,%ymm11 6899 vpaddd 256(%rbp),%ymm15,%ymm15 6900 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 6901 vpaddd 64(%rbp),%ymm6,%ymm6 6902 vpaddd 96(%rbp),%ymm10,%ymm10 6903 vpaddd 224(%rbp),%ymm14,%ymm14 6904 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 6905 vpaddd 64(%rbp),%ymm5,%ymm5 6906 vpaddd 96(%rbp),%ymm9,%ymm9 6907 vpaddd 192(%rbp),%ymm13,%ymm13 6908 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 6909 vpaddd 64(%rbp),%ymm4,%ymm4 6910 vpaddd 96(%rbp),%ymm8,%ymm8 6911 vpaddd 160(%rbp),%ymm12,%ymm12 6912 6913 leaq 32(%rdi),%rdi 6914 vmovdqa %ymm0,128(%rbp) 6915 addq -32(%rdi),%r10 6916 adcq 8+-32(%rdi),%r11 6917 adcq $1,%r12 6918 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 6919 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 6920 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 6921 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 6922 vpxor 0+0(%rsi),%ymm0,%ymm0 6923 vpxor 32+0(%rsi),%ymm3,%ymm3 6924 vpxor 64+0(%rsi),%ymm7,%ymm7 6925 vpxor 96+0(%rsi),%ymm11,%ymm11 6926 vmovdqu %ymm0,0+0(%rdi) 6927 vmovdqu %ymm3,32+0(%rdi) 6928 vmovdqu %ymm7,64+0(%rdi) 6929 vmovdqu %ymm11,96+0(%rdi) 6930 6931 vmovdqa 128(%rbp),%ymm0 6932 movq 0+0(%rbp),%rax 6933 movq %rax,%r15 6934 mulq %r10 6935 movq %rax,%r13 6936 movq %rdx,%r14 6937 movq 0+0(%rbp),%rax 6938 mulq %r11 6939 imulq %r12,%r15 6940 addq %rax,%r14 6941 adcq %rdx,%r15 6942 movq 8+0(%rbp),%rax 6943 movq %rax,%r9 6944 mulq %r10 6945 addq %rax,%r14 6946 adcq $0,%rdx 6947 movq %rdx,%r10 6948 movq 8+0(%rbp),%rax 6949 mulq %r11 6950 addq %rax,%r15 6951 adcq $0,%rdx 6952 imulq %r12,%r9 6953 addq %r10,%r15 6954 adcq %rdx,%r9 6955 movq %r13,%r10 6956 movq %r14,%r11 6957 movq %r15,%r12 6958 andq $3,%r12 6959 movq %r15,%r13 6960 andq $-4,%r13 6961 movq %r9,%r14 6962 shrdq $2,%r9,%r15 6963 shrq $2,%r9 6964 addq %r13,%r10 6965 adcq %r14,%r11 6966 adcq $0,%r12 6967 addq %r15,%r10 6968 adcq %r9,%r11 6969 adcq $0,%r12 6970 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 6971 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 6972 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 6973 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 6974 vpxor 0+128(%rsi),%ymm3,%ymm3 6975 vpxor 32+128(%rsi),%ymm2,%ymm2 6976 vpxor 64+128(%rsi),%ymm6,%ymm6 6977 vpxor 96+128(%rsi),%ymm10,%ymm10 6978 vmovdqu %ymm3,0+128(%rdi) 6979 vmovdqu %ymm2,32+128(%rdi) 6980 vmovdqu %ymm6,64+128(%rdi) 6981 vmovdqu %ymm10,96+128(%rdi) 6982 addq -16(%rdi),%r10 6983 adcq 8+-16(%rdi),%r11 6984 adcq $1,%r12 6985 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 6986 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 6987 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 6988 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 6989 vpxor 0+256(%rsi),%ymm3,%ymm3 6990 vpxor 32+256(%rsi),%ymm1,%ymm1 6991 vpxor 64+256(%rsi),%ymm5,%ymm5 6992 vpxor 96+256(%rsi),%ymm9,%ymm9 6993 vmovdqu %ymm3,0+256(%rdi) 6994 vmovdqu %ymm1,32+256(%rdi) 6995 vmovdqu %ymm5,64+256(%rdi) 6996 vmovdqu %ymm9,96+256(%rdi) 6997 movq 0+0(%rbp),%rax 6998 movq %rax,%r15 6999 mulq %r10 7000 movq %rax,%r13 7001 movq %rdx,%r14 7002 movq 0+0(%rbp),%rax 7003 mulq %r11 7004 imulq %r12,%r15 7005 addq %rax,%r14 7006 adcq %rdx,%r15 7007 movq 8+0(%rbp),%rax 7008 movq %rax,%r9 7009 mulq %r10 7010 addq %rax,%r14 7011 adcq $0,%rdx 7012 movq %rdx,%r10 7013 movq 8+0(%rbp),%rax 7014 mulq %r11 7015 addq %rax,%r15 7016 adcq $0,%rdx 7017 imulq %r12,%r9 7018 addq %r10,%r15 7019 adcq %rdx,%r9 7020 movq %r13,%r10 7021 movq %r14,%r11 7022 movq %r15,%r12 7023 andq $3,%r12 7024 movq %r15,%r13 7025 andq $-4,%r13 7026 movq %r9,%r14 7027 shrdq $2,%r9,%r15 7028 shrq $2,%r9 7029 addq %r13,%r10 7030 adcq %r14,%r11 7031 adcq $0,%r12 7032 addq %r15,%r10 7033 adcq %r9,%r11 7034 adcq $0,%r12 7035 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 7036 vperm2i128 $0x13,%ymm0,%ymm4,%ymm4 7037 vperm2i128 $0x02,%ymm8,%ymm12,%ymm0 7038 vperm2i128 $0x13,%ymm8,%ymm12,%ymm8 7039 vpxor 0+384(%rsi),%ymm3,%ymm3 7040 vpxor 32+384(%rsi),%ymm0,%ymm0 7041 vpxor 64+384(%rsi),%ymm4,%ymm4 7042 vpxor 96+384(%rsi),%ymm8,%ymm8 7043 vmovdqu %ymm3,0+384(%rdi) 7044 vmovdqu %ymm0,32+384(%rdi) 7045 vmovdqu %ymm4,64+384(%rdi) 7046 vmovdqu %ymm8,96+384(%rdi) 7047 7048 leaq 512(%rsi),%rsi 7049 subq $512,%rbx 7050 cmpq $512,%rbx 7051 jg 1b 7052 addq 0(%rdi),%r10 7053 adcq 8+0(%rdi),%r11 7054 adcq $1,%r12 7055 movq 0+0(%rbp),%rax 7056 movq %rax,%r15 7057 mulq %r10 7058 movq %rax,%r13 7059 movq %rdx,%r14 7060 movq 0+0(%rbp),%rax 7061 mulq %r11 7062 imulq %r12,%r15 7063 addq %rax,%r14 7064 adcq %rdx,%r15 7065 movq 8+0(%rbp),%rax 7066 movq %rax,%r9 7067 mulq %r10 7068 addq %rax,%r14 7069 adcq $0,%rdx 7070 movq %rdx,%r10 7071 movq 8+0(%rbp),%rax 7072 mulq %r11 7073 addq %rax,%r15 7074 adcq $0,%rdx 7075 imulq %r12,%r9 7076 addq %r10,%r15 7077 adcq %rdx,%r9 7078 movq %r13,%r10 7079 movq %r14,%r11 7080 movq %r15,%r12 7081 andq $3,%r12 7082 movq %r15,%r13 7083 andq $-4,%r13 7084 movq %r9,%r14 7085 shrdq $2,%r9,%r15 7086 shrq $2,%r9 7087 addq %r13,%r10 7088 adcq %r14,%r11 7089 adcq $0,%r12 7090 addq %r15,%r10 7091 adcq %r9,%r11 7092 adcq $0,%r12 7093 addq 16(%rdi),%r10 7094 adcq 8+16(%rdi),%r11 7095 adcq $1,%r12 7096 movq 0+0(%rbp),%rax 7097 movq %rax,%r15 7098 mulq %r10 7099 movq %rax,%r13 7100 movq %rdx,%r14 7101 movq 0+0(%rbp),%rax 7102 mulq %r11 7103 imulq %r12,%r15 7104 addq %rax,%r14 7105 adcq %rdx,%r15 7106 movq 8+0(%rbp),%rax 7107 movq %rax,%r9 7108 mulq %r10 7109 addq %rax,%r14 7110 adcq $0,%rdx 7111 movq %rdx,%r10 7112 movq 8+0(%rbp),%rax 7113 mulq %r11 7114 addq %rax,%r15 7115 adcq $0,%rdx 7116 imulq %r12,%r9 7117 addq %r10,%r15 7118 adcq %rdx,%r9 7119 movq %r13,%r10 7120 movq %r14,%r11 7121 movq %r15,%r12 7122 andq $3,%r12 7123 movq %r15,%r13 7124 andq $-4,%r13 7125 movq %r9,%r14 7126 shrdq $2,%r9,%r15 7127 shrq $2,%r9 7128 addq %r13,%r10 7129 adcq %r14,%r11 7130 adcq $0,%r12 7131 addq %r15,%r10 7132 adcq %r9,%r11 7133 adcq $0,%r12 7134 7135 leaq 32(%rdi),%rdi 7136 movq $10,%rcx 7137 xorq %r8,%r8 7138 cmpq $128,%rbx 7139 ja 3f 7140 7141seal_avx2_tail_128: 7142 vmovdqa .chacha20_consts(%rip),%ymm0 7143 vmovdqa 64(%rbp),%ymm4 7144 vmovdqa 96(%rbp),%ymm8 7145 vmovdqa .avx2_inc(%rip),%ymm12 7146 vpaddd 160(%rbp),%ymm12,%ymm12 7147 vmovdqa %ymm12,160(%rbp) 7148 71491: 7150 addq 0(%rdi),%r10 7151 adcq 8+0(%rdi),%r11 7152 adcq $1,%r12 7153 movq 0+0(%rbp),%rax 7154 movq %rax,%r15 7155 mulq %r10 7156 movq %rax,%r13 7157 movq %rdx,%r14 7158 movq 0+0(%rbp),%rax 7159 mulq %r11 7160 imulq %r12,%r15 7161 addq %rax,%r14 7162 adcq %rdx,%r15 7163 movq 8+0(%rbp),%rax 7164 movq %rax,%r9 7165 mulq %r10 7166 addq %rax,%r14 7167 adcq $0,%rdx 7168 movq %rdx,%r10 7169 movq 8+0(%rbp),%rax 7170 mulq %r11 7171 addq %rax,%r15 7172 adcq $0,%rdx 7173 imulq %r12,%r9 7174 addq %r10,%r15 7175 adcq %rdx,%r9 7176 movq %r13,%r10 7177 movq %r14,%r11 7178 movq %r15,%r12 7179 andq $3,%r12 7180 movq %r15,%r13 7181 andq $-4,%r13 7182 movq %r9,%r14 7183 shrdq $2,%r9,%r15 7184 shrq $2,%r9 7185 addq %r13,%r10 7186 adcq %r14,%r11 7187 adcq $0,%r12 7188 addq %r15,%r10 7189 adcq %r9,%r11 7190 adcq $0,%r12 7191 7192 leaq 16(%rdi),%rdi 71932: 7194 vpaddd %ymm4,%ymm0,%ymm0 7195 vpxor %ymm0,%ymm12,%ymm12 7196 vpshufb .rol16(%rip),%ymm12,%ymm12 7197 vpaddd %ymm12,%ymm8,%ymm8 7198 vpxor %ymm8,%ymm4,%ymm4 7199 vpsrld $20,%ymm4,%ymm3 7200 vpslld $12,%ymm4,%ymm4 7201 vpxor %ymm3,%ymm4,%ymm4 7202 vpaddd %ymm4,%ymm0,%ymm0 7203 vpxor %ymm0,%ymm12,%ymm12 7204 vpshufb .rol8(%rip),%ymm12,%ymm12 7205 vpaddd %ymm12,%ymm8,%ymm8 7206 vpxor %ymm8,%ymm4,%ymm4 7207 vpslld $7,%ymm4,%ymm3 7208 vpsrld $25,%ymm4,%ymm4 7209 vpxor %ymm3,%ymm4,%ymm4 7210 vpalignr $12,%ymm12,%ymm12,%ymm12 7211 vpalignr $8,%ymm8,%ymm8,%ymm8 7212 vpalignr $4,%ymm4,%ymm4,%ymm4 7213 addq 0(%rdi),%r10 7214 adcq 8+0(%rdi),%r11 7215 adcq $1,%r12 7216 movq 0+0(%rbp),%rax 7217 movq %rax,%r15 7218 mulq %r10 7219 movq %rax,%r13 7220 movq %rdx,%r14 7221 movq 0+0(%rbp),%rax 7222 mulq %r11 7223 imulq %r12,%r15 7224 addq %rax,%r14 7225 adcq %rdx,%r15 7226 movq 8+0(%rbp),%rax 7227 movq %rax,%r9 7228 mulq %r10 7229 addq %rax,%r14 7230 adcq $0,%rdx 7231 movq %rdx,%r10 7232 movq 8+0(%rbp),%rax 7233 mulq %r11 7234 addq %rax,%r15 7235 adcq $0,%rdx 7236 imulq %r12,%r9 7237 addq %r10,%r15 7238 adcq %rdx,%r9 7239 movq %r13,%r10 7240 movq %r14,%r11 7241 movq %r15,%r12 7242 andq $3,%r12 7243 movq %r15,%r13 7244 andq $-4,%r13 7245 movq %r9,%r14 7246 shrdq $2,%r9,%r15 7247 shrq $2,%r9 7248 addq %r13,%r10 7249 adcq %r14,%r11 7250 adcq $0,%r12 7251 addq %r15,%r10 7252 adcq %r9,%r11 7253 adcq $0,%r12 7254 vpaddd %ymm4,%ymm0,%ymm0 7255 vpxor %ymm0,%ymm12,%ymm12 7256 vpshufb .rol16(%rip),%ymm12,%ymm12 7257 vpaddd %ymm12,%ymm8,%ymm8 7258 vpxor %ymm8,%ymm4,%ymm4 7259 vpsrld $20,%ymm4,%ymm3 7260 vpslld $12,%ymm4,%ymm4 7261 vpxor %ymm3,%ymm4,%ymm4 7262 vpaddd %ymm4,%ymm0,%ymm0 7263 vpxor %ymm0,%ymm12,%ymm12 7264 vpshufb .rol8(%rip),%ymm12,%ymm12 7265 vpaddd %ymm12,%ymm8,%ymm8 7266 vpxor %ymm8,%ymm4,%ymm4 7267 vpslld $7,%ymm4,%ymm3 7268 vpsrld $25,%ymm4,%ymm4 7269 vpxor %ymm3,%ymm4,%ymm4 7270 vpalignr $4,%ymm12,%ymm12,%ymm12 7271 vpalignr $8,%ymm8,%ymm8,%ymm8 7272 vpalignr $12,%ymm4,%ymm4,%ymm4 7273 addq 16(%rdi),%r10 7274 adcq 8+16(%rdi),%r11 7275 adcq $1,%r12 7276 movq 0+0(%rbp),%rax 7277 movq %rax,%r15 7278 mulq %r10 7279 movq %rax,%r13 7280 movq %rdx,%r14 7281 movq 0+0(%rbp),%rax 7282 mulq %r11 7283 imulq %r12,%r15 7284 addq %rax,%r14 7285 adcq %rdx,%r15 7286 movq 8+0(%rbp),%rax 7287 movq %rax,%r9 7288 mulq %r10 7289 addq %rax,%r14 7290 adcq $0,%rdx 7291 movq %rdx,%r10 7292 movq 8+0(%rbp),%rax 7293 mulq %r11 7294 addq %rax,%r15 7295 adcq $0,%rdx 7296 imulq %r12,%r9 7297 addq %r10,%r15 7298 adcq %rdx,%r9 7299 movq %r13,%r10 7300 movq %r14,%r11 7301 movq %r15,%r12 7302 andq $3,%r12 7303 movq %r15,%r13 7304 andq $-4,%r13 7305 movq %r9,%r14 7306 shrdq $2,%r9,%r15 7307 shrq $2,%r9 7308 addq %r13,%r10 7309 adcq %r14,%r11 7310 adcq $0,%r12 7311 addq %r15,%r10 7312 adcq %r9,%r11 7313 adcq $0,%r12 7314 7315 leaq 32(%rdi),%rdi 7316 decq %rcx 7317 jg 1b 7318 decq %r8 7319 jge 2b 7320 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7321 vpaddd 64(%rbp),%ymm4,%ymm4 7322 vpaddd 96(%rbp),%ymm8,%ymm8 7323 vpaddd 160(%rbp),%ymm12,%ymm12 7324 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7325 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7326 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7327 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7328 vmovdqa %ymm3,%ymm8 7329 7330 jmp seal_avx2_short_loop 73313: 7332 cmpq $256,%rbx 7333 ja 3f 7334 7335seal_avx2_tail_256: 7336 vmovdqa .chacha20_consts(%rip),%ymm0 7337 vmovdqa 64(%rbp),%ymm4 7338 vmovdqa 96(%rbp),%ymm8 7339 vmovdqa %ymm0,%ymm1 7340 vmovdqa %ymm4,%ymm5 7341 vmovdqa %ymm8,%ymm9 7342 vmovdqa .avx2_inc(%rip),%ymm12 7343 vpaddd 160(%rbp),%ymm12,%ymm13 7344 vpaddd %ymm13,%ymm12,%ymm12 7345 vmovdqa %ymm12,160(%rbp) 7346 vmovdqa %ymm13,192(%rbp) 7347 73481: 7349 addq 0(%rdi),%r10 7350 adcq 8+0(%rdi),%r11 7351 adcq $1,%r12 7352 movq 0+0(%rbp),%rax 7353 movq %rax,%r15 7354 mulq %r10 7355 movq %rax,%r13 7356 movq %rdx,%r14 7357 movq 0+0(%rbp),%rax 7358 mulq %r11 7359 imulq %r12,%r15 7360 addq %rax,%r14 7361 adcq %rdx,%r15 7362 movq 8+0(%rbp),%rax 7363 movq %rax,%r9 7364 mulq %r10 7365 addq %rax,%r14 7366 adcq $0,%rdx 7367 movq %rdx,%r10 7368 movq 8+0(%rbp),%rax 7369 mulq %r11 7370 addq %rax,%r15 7371 adcq $0,%rdx 7372 imulq %r12,%r9 7373 addq %r10,%r15 7374 adcq %rdx,%r9 7375 movq %r13,%r10 7376 movq %r14,%r11 7377 movq %r15,%r12 7378 andq $3,%r12 7379 movq %r15,%r13 7380 andq $-4,%r13 7381 movq %r9,%r14 7382 shrdq $2,%r9,%r15 7383 shrq $2,%r9 7384 addq %r13,%r10 7385 adcq %r14,%r11 7386 adcq $0,%r12 7387 addq %r15,%r10 7388 adcq %r9,%r11 7389 adcq $0,%r12 7390 7391 leaq 16(%rdi),%rdi 73922: 7393 vpaddd %ymm4,%ymm0,%ymm0 7394 vpxor %ymm0,%ymm12,%ymm12 7395 vpshufb .rol16(%rip),%ymm12,%ymm12 7396 vpaddd %ymm12,%ymm8,%ymm8 7397 vpxor %ymm8,%ymm4,%ymm4 7398 vpsrld $20,%ymm4,%ymm3 7399 vpslld $12,%ymm4,%ymm4 7400 vpxor %ymm3,%ymm4,%ymm4 7401 vpaddd %ymm4,%ymm0,%ymm0 7402 vpxor %ymm0,%ymm12,%ymm12 7403 vpshufb .rol8(%rip),%ymm12,%ymm12 7404 vpaddd %ymm12,%ymm8,%ymm8 7405 vpxor %ymm8,%ymm4,%ymm4 7406 vpslld $7,%ymm4,%ymm3 7407 vpsrld $25,%ymm4,%ymm4 7408 vpxor %ymm3,%ymm4,%ymm4 7409 vpalignr $12,%ymm12,%ymm12,%ymm12 7410 vpalignr $8,%ymm8,%ymm8,%ymm8 7411 vpalignr $4,%ymm4,%ymm4,%ymm4 7412 vpaddd %ymm5,%ymm1,%ymm1 7413 vpxor %ymm1,%ymm13,%ymm13 7414 vpshufb .rol16(%rip),%ymm13,%ymm13 7415 vpaddd %ymm13,%ymm9,%ymm9 7416 vpxor %ymm9,%ymm5,%ymm5 7417 vpsrld $20,%ymm5,%ymm3 7418 vpslld $12,%ymm5,%ymm5 7419 vpxor %ymm3,%ymm5,%ymm5 7420 vpaddd %ymm5,%ymm1,%ymm1 7421 vpxor %ymm1,%ymm13,%ymm13 7422 vpshufb .rol8(%rip),%ymm13,%ymm13 7423 vpaddd %ymm13,%ymm9,%ymm9 7424 vpxor %ymm9,%ymm5,%ymm5 7425 vpslld $7,%ymm5,%ymm3 7426 vpsrld $25,%ymm5,%ymm5 7427 vpxor %ymm3,%ymm5,%ymm5 7428 vpalignr $12,%ymm13,%ymm13,%ymm13 7429 vpalignr $8,%ymm9,%ymm9,%ymm9 7430 vpalignr $4,%ymm5,%ymm5,%ymm5 7431 addq 0(%rdi),%r10 7432 adcq 8+0(%rdi),%r11 7433 adcq $1,%r12 7434 movq 0+0(%rbp),%rax 7435 movq %rax,%r15 7436 mulq %r10 7437 movq %rax,%r13 7438 movq %rdx,%r14 7439 movq 0+0(%rbp),%rax 7440 mulq %r11 7441 imulq %r12,%r15 7442 addq %rax,%r14 7443 adcq %rdx,%r15 7444 movq 8+0(%rbp),%rax 7445 movq %rax,%r9 7446 mulq %r10 7447 addq %rax,%r14 7448 adcq $0,%rdx 7449 movq %rdx,%r10 7450 movq 8+0(%rbp),%rax 7451 mulq %r11 7452 addq %rax,%r15 7453 adcq $0,%rdx 7454 imulq %r12,%r9 7455 addq %r10,%r15 7456 adcq %rdx,%r9 7457 movq %r13,%r10 7458 movq %r14,%r11 7459 movq %r15,%r12 7460 andq $3,%r12 7461 movq %r15,%r13 7462 andq $-4,%r13 7463 movq %r9,%r14 7464 shrdq $2,%r9,%r15 7465 shrq $2,%r9 7466 addq %r13,%r10 7467 adcq %r14,%r11 7468 adcq $0,%r12 7469 addq %r15,%r10 7470 adcq %r9,%r11 7471 adcq $0,%r12 7472 vpaddd %ymm4,%ymm0,%ymm0 7473 vpxor %ymm0,%ymm12,%ymm12 7474 vpshufb .rol16(%rip),%ymm12,%ymm12 7475 vpaddd %ymm12,%ymm8,%ymm8 7476 vpxor %ymm8,%ymm4,%ymm4 7477 vpsrld $20,%ymm4,%ymm3 7478 vpslld $12,%ymm4,%ymm4 7479 vpxor %ymm3,%ymm4,%ymm4 7480 vpaddd %ymm4,%ymm0,%ymm0 7481 vpxor %ymm0,%ymm12,%ymm12 7482 vpshufb .rol8(%rip),%ymm12,%ymm12 7483 vpaddd %ymm12,%ymm8,%ymm8 7484 vpxor %ymm8,%ymm4,%ymm4 7485 vpslld $7,%ymm4,%ymm3 7486 vpsrld $25,%ymm4,%ymm4 7487 vpxor %ymm3,%ymm4,%ymm4 7488 vpalignr $4,%ymm12,%ymm12,%ymm12 7489 vpalignr $8,%ymm8,%ymm8,%ymm8 7490 vpalignr $12,%ymm4,%ymm4,%ymm4 7491 vpaddd %ymm5,%ymm1,%ymm1 7492 vpxor %ymm1,%ymm13,%ymm13 7493 vpshufb .rol16(%rip),%ymm13,%ymm13 7494 vpaddd %ymm13,%ymm9,%ymm9 7495 vpxor %ymm9,%ymm5,%ymm5 7496 vpsrld $20,%ymm5,%ymm3 7497 vpslld $12,%ymm5,%ymm5 7498 vpxor %ymm3,%ymm5,%ymm5 7499 vpaddd %ymm5,%ymm1,%ymm1 7500 vpxor %ymm1,%ymm13,%ymm13 7501 vpshufb .rol8(%rip),%ymm13,%ymm13 7502 vpaddd %ymm13,%ymm9,%ymm9 7503 vpxor %ymm9,%ymm5,%ymm5 7504 vpslld $7,%ymm5,%ymm3 7505 vpsrld $25,%ymm5,%ymm5 7506 vpxor %ymm3,%ymm5,%ymm5 7507 vpalignr $4,%ymm13,%ymm13,%ymm13 7508 vpalignr $8,%ymm9,%ymm9,%ymm9 7509 vpalignr $12,%ymm5,%ymm5,%ymm5 7510 addq 16(%rdi),%r10 7511 adcq 8+16(%rdi),%r11 7512 adcq $1,%r12 7513 movq 0+0(%rbp),%rax 7514 movq %rax,%r15 7515 mulq %r10 7516 movq %rax,%r13 7517 movq %rdx,%r14 7518 movq 0+0(%rbp),%rax 7519 mulq %r11 7520 imulq %r12,%r15 7521 addq %rax,%r14 7522 adcq %rdx,%r15 7523 movq 8+0(%rbp),%rax 7524 movq %rax,%r9 7525 mulq %r10 7526 addq %rax,%r14 7527 adcq $0,%rdx 7528 movq %rdx,%r10 7529 movq 8+0(%rbp),%rax 7530 mulq %r11 7531 addq %rax,%r15 7532 adcq $0,%rdx 7533 imulq %r12,%r9 7534 addq %r10,%r15 7535 adcq %rdx,%r9 7536 movq %r13,%r10 7537 movq %r14,%r11 7538 movq %r15,%r12 7539 andq $3,%r12 7540 movq %r15,%r13 7541 andq $-4,%r13 7542 movq %r9,%r14 7543 shrdq $2,%r9,%r15 7544 shrq $2,%r9 7545 addq %r13,%r10 7546 adcq %r14,%r11 7547 adcq $0,%r12 7548 addq %r15,%r10 7549 adcq %r9,%r11 7550 adcq $0,%r12 7551 7552 leaq 32(%rdi),%rdi 7553 decq %rcx 7554 jg 1b 7555 decq %r8 7556 jge 2b 7557 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 7558 vpaddd 64(%rbp),%ymm5,%ymm5 7559 vpaddd 96(%rbp),%ymm9,%ymm9 7560 vpaddd 192(%rbp),%ymm13,%ymm13 7561 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7562 vpaddd 64(%rbp),%ymm4,%ymm4 7563 vpaddd 96(%rbp),%ymm8,%ymm8 7564 vpaddd 160(%rbp),%ymm12,%ymm12 7565 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7566 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7567 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7568 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7569 vpxor 0+0(%rsi),%ymm3,%ymm3 7570 vpxor 32+0(%rsi),%ymm1,%ymm1 7571 vpxor 64+0(%rsi),%ymm5,%ymm5 7572 vpxor 96+0(%rsi),%ymm9,%ymm9 7573 vmovdqu %ymm3,0+0(%rdi) 7574 vmovdqu %ymm1,32+0(%rdi) 7575 vmovdqu %ymm5,64+0(%rdi) 7576 vmovdqu %ymm9,96+0(%rdi) 7577 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7578 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7579 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7580 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7581 vmovdqa %ymm3,%ymm8 7582 7583 movq $128,%rcx 7584 leaq 128(%rsi),%rsi 7585 subq $128,%rbx 7586 jmp seal_avx2_hash 75873: 7588 cmpq $384,%rbx 7589 ja seal_avx2_tail_512 7590 7591seal_avx2_tail_384: 7592 vmovdqa .chacha20_consts(%rip),%ymm0 7593 vmovdqa 64(%rbp),%ymm4 7594 vmovdqa 96(%rbp),%ymm8 7595 vmovdqa %ymm0,%ymm1 7596 vmovdqa %ymm4,%ymm5 7597 vmovdqa %ymm8,%ymm9 7598 vmovdqa %ymm0,%ymm2 7599 vmovdqa %ymm4,%ymm6 7600 vmovdqa %ymm8,%ymm10 7601 vmovdqa .avx2_inc(%rip),%ymm12 7602 vpaddd 160(%rbp),%ymm12,%ymm14 7603 vpaddd %ymm14,%ymm12,%ymm13 7604 vpaddd %ymm13,%ymm12,%ymm12 7605 vmovdqa %ymm12,160(%rbp) 7606 vmovdqa %ymm13,192(%rbp) 7607 vmovdqa %ymm14,224(%rbp) 7608 76091: 7610 addq 0(%rdi),%r10 7611 adcq 8+0(%rdi),%r11 7612 adcq $1,%r12 7613 movq 0+0(%rbp),%rax 7614 movq %rax,%r15 7615 mulq %r10 7616 movq %rax,%r13 7617 movq %rdx,%r14 7618 movq 0+0(%rbp),%rax 7619 mulq %r11 7620 imulq %r12,%r15 7621 addq %rax,%r14 7622 adcq %rdx,%r15 7623 movq 8+0(%rbp),%rax 7624 movq %rax,%r9 7625 mulq %r10 7626 addq %rax,%r14 7627 adcq $0,%rdx 7628 movq %rdx,%r10 7629 movq 8+0(%rbp),%rax 7630 mulq %r11 7631 addq %rax,%r15 7632 adcq $0,%rdx 7633 imulq %r12,%r9 7634 addq %r10,%r15 7635 adcq %rdx,%r9 7636 movq %r13,%r10 7637 movq %r14,%r11 7638 movq %r15,%r12 7639 andq $3,%r12 7640 movq %r15,%r13 7641 andq $-4,%r13 7642 movq %r9,%r14 7643 shrdq $2,%r9,%r15 7644 shrq $2,%r9 7645 addq %r13,%r10 7646 adcq %r14,%r11 7647 adcq $0,%r12 7648 addq %r15,%r10 7649 adcq %r9,%r11 7650 adcq $0,%r12 7651 7652 leaq 16(%rdi),%rdi 76532: 7654 vpaddd %ymm4,%ymm0,%ymm0 7655 vpxor %ymm0,%ymm12,%ymm12 7656 vpshufb .rol16(%rip),%ymm12,%ymm12 7657 vpaddd %ymm12,%ymm8,%ymm8 7658 vpxor %ymm8,%ymm4,%ymm4 7659 vpsrld $20,%ymm4,%ymm3 7660 vpslld $12,%ymm4,%ymm4 7661 vpxor %ymm3,%ymm4,%ymm4 7662 vpaddd %ymm4,%ymm0,%ymm0 7663 vpxor %ymm0,%ymm12,%ymm12 7664 vpshufb .rol8(%rip),%ymm12,%ymm12 7665 vpaddd %ymm12,%ymm8,%ymm8 7666 vpxor %ymm8,%ymm4,%ymm4 7667 vpslld $7,%ymm4,%ymm3 7668 vpsrld $25,%ymm4,%ymm4 7669 vpxor %ymm3,%ymm4,%ymm4 7670 vpalignr $12,%ymm12,%ymm12,%ymm12 7671 vpalignr $8,%ymm8,%ymm8,%ymm8 7672 vpalignr $4,%ymm4,%ymm4,%ymm4 7673 vpaddd %ymm5,%ymm1,%ymm1 7674 vpxor %ymm1,%ymm13,%ymm13 7675 vpshufb .rol16(%rip),%ymm13,%ymm13 7676 vpaddd %ymm13,%ymm9,%ymm9 7677 vpxor %ymm9,%ymm5,%ymm5 7678 vpsrld $20,%ymm5,%ymm3 7679 vpslld $12,%ymm5,%ymm5 7680 vpxor %ymm3,%ymm5,%ymm5 7681 vpaddd %ymm5,%ymm1,%ymm1 7682 vpxor %ymm1,%ymm13,%ymm13 7683 vpshufb .rol8(%rip),%ymm13,%ymm13 7684 vpaddd %ymm13,%ymm9,%ymm9 7685 vpxor %ymm9,%ymm5,%ymm5 7686 vpslld $7,%ymm5,%ymm3 7687 vpsrld $25,%ymm5,%ymm5 7688 vpxor %ymm3,%ymm5,%ymm5 7689 vpalignr $12,%ymm13,%ymm13,%ymm13 7690 vpalignr $8,%ymm9,%ymm9,%ymm9 7691 vpalignr $4,%ymm5,%ymm5,%ymm5 7692 addq 0(%rdi),%r10 7693 adcq 8+0(%rdi),%r11 7694 adcq $1,%r12 7695 movq 0+0(%rbp),%rax 7696 movq %rax,%r15 7697 mulq %r10 7698 movq %rax,%r13 7699 movq %rdx,%r14 7700 movq 0+0(%rbp),%rax 7701 mulq %r11 7702 imulq %r12,%r15 7703 addq %rax,%r14 7704 adcq %rdx,%r15 7705 movq 8+0(%rbp),%rax 7706 movq %rax,%r9 7707 mulq %r10 7708 addq %rax,%r14 7709 adcq $0,%rdx 7710 movq %rdx,%r10 7711 movq 8+0(%rbp),%rax 7712 mulq %r11 7713 addq %rax,%r15 7714 adcq $0,%rdx 7715 imulq %r12,%r9 7716 addq %r10,%r15 7717 adcq %rdx,%r9 7718 movq %r13,%r10 7719 movq %r14,%r11 7720 movq %r15,%r12 7721 andq $3,%r12 7722 movq %r15,%r13 7723 andq $-4,%r13 7724 movq %r9,%r14 7725 shrdq $2,%r9,%r15 7726 shrq $2,%r9 7727 addq %r13,%r10 7728 adcq %r14,%r11 7729 adcq $0,%r12 7730 addq %r15,%r10 7731 adcq %r9,%r11 7732 adcq $0,%r12 7733 vpaddd %ymm6,%ymm2,%ymm2 7734 vpxor %ymm2,%ymm14,%ymm14 7735 vpshufb .rol16(%rip),%ymm14,%ymm14 7736 vpaddd %ymm14,%ymm10,%ymm10 7737 vpxor %ymm10,%ymm6,%ymm6 7738 vpsrld $20,%ymm6,%ymm3 7739 vpslld $12,%ymm6,%ymm6 7740 vpxor %ymm3,%ymm6,%ymm6 7741 vpaddd %ymm6,%ymm2,%ymm2 7742 vpxor %ymm2,%ymm14,%ymm14 7743 vpshufb .rol8(%rip),%ymm14,%ymm14 7744 vpaddd %ymm14,%ymm10,%ymm10 7745 vpxor %ymm10,%ymm6,%ymm6 7746 vpslld $7,%ymm6,%ymm3 7747 vpsrld $25,%ymm6,%ymm6 7748 vpxor %ymm3,%ymm6,%ymm6 7749 vpalignr $12,%ymm14,%ymm14,%ymm14 7750 vpalignr $8,%ymm10,%ymm10,%ymm10 7751 vpalignr $4,%ymm6,%ymm6,%ymm6 7752 vpaddd %ymm4,%ymm0,%ymm0 7753 vpxor %ymm0,%ymm12,%ymm12 7754 vpshufb .rol16(%rip),%ymm12,%ymm12 7755 vpaddd %ymm12,%ymm8,%ymm8 7756 vpxor %ymm8,%ymm4,%ymm4 7757 vpsrld $20,%ymm4,%ymm3 7758 vpslld $12,%ymm4,%ymm4 7759 vpxor %ymm3,%ymm4,%ymm4 7760 vpaddd %ymm4,%ymm0,%ymm0 7761 vpxor %ymm0,%ymm12,%ymm12 7762 vpshufb .rol8(%rip),%ymm12,%ymm12 7763 vpaddd %ymm12,%ymm8,%ymm8 7764 vpxor %ymm8,%ymm4,%ymm4 7765 vpslld $7,%ymm4,%ymm3 7766 vpsrld $25,%ymm4,%ymm4 7767 vpxor %ymm3,%ymm4,%ymm4 7768 vpalignr $4,%ymm12,%ymm12,%ymm12 7769 vpalignr $8,%ymm8,%ymm8,%ymm8 7770 vpalignr $12,%ymm4,%ymm4,%ymm4 7771 addq 16(%rdi),%r10 7772 adcq 8+16(%rdi),%r11 7773 adcq $1,%r12 7774 movq 0+0(%rbp),%rax 7775 movq %rax,%r15 7776 mulq %r10 7777 movq %rax,%r13 7778 movq %rdx,%r14 7779 movq 0+0(%rbp),%rax 7780 mulq %r11 7781 imulq %r12,%r15 7782 addq %rax,%r14 7783 adcq %rdx,%r15 7784 movq 8+0(%rbp),%rax 7785 movq %rax,%r9 7786 mulq %r10 7787 addq %rax,%r14 7788 adcq $0,%rdx 7789 movq %rdx,%r10 7790 movq 8+0(%rbp),%rax 7791 mulq %r11 7792 addq %rax,%r15 7793 adcq $0,%rdx 7794 imulq %r12,%r9 7795 addq %r10,%r15 7796 adcq %rdx,%r9 7797 movq %r13,%r10 7798 movq %r14,%r11 7799 movq %r15,%r12 7800 andq $3,%r12 7801 movq %r15,%r13 7802 andq $-4,%r13 7803 movq %r9,%r14 7804 shrdq $2,%r9,%r15 7805 shrq $2,%r9 7806 addq %r13,%r10 7807 adcq %r14,%r11 7808 adcq $0,%r12 7809 addq %r15,%r10 7810 adcq %r9,%r11 7811 adcq $0,%r12 7812 vpaddd %ymm5,%ymm1,%ymm1 7813 vpxor %ymm1,%ymm13,%ymm13 7814 vpshufb .rol16(%rip),%ymm13,%ymm13 7815 vpaddd %ymm13,%ymm9,%ymm9 7816 vpxor %ymm9,%ymm5,%ymm5 7817 vpsrld $20,%ymm5,%ymm3 7818 vpslld $12,%ymm5,%ymm5 7819 vpxor %ymm3,%ymm5,%ymm5 7820 vpaddd %ymm5,%ymm1,%ymm1 7821 vpxor %ymm1,%ymm13,%ymm13 7822 vpshufb .rol8(%rip),%ymm13,%ymm13 7823 vpaddd %ymm13,%ymm9,%ymm9 7824 vpxor %ymm9,%ymm5,%ymm5 7825 vpslld $7,%ymm5,%ymm3 7826 vpsrld $25,%ymm5,%ymm5 7827 vpxor %ymm3,%ymm5,%ymm5 7828 vpalignr $4,%ymm13,%ymm13,%ymm13 7829 vpalignr $8,%ymm9,%ymm9,%ymm9 7830 vpalignr $12,%ymm5,%ymm5,%ymm5 7831 vpaddd %ymm6,%ymm2,%ymm2 7832 vpxor %ymm2,%ymm14,%ymm14 7833 vpshufb .rol16(%rip),%ymm14,%ymm14 7834 vpaddd %ymm14,%ymm10,%ymm10 7835 vpxor %ymm10,%ymm6,%ymm6 7836 vpsrld $20,%ymm6,%ymm3 7837 vpslld $12,%ymm6,%ymm6 7838 vpxor %ymm3,%ymm6,%ymm6 7839 vpaddd %ymm6,%ymm2,%ymm2 7840 vpxor %ymm2,%ymm14,%ymm14 7841 vpshufb .rol8(%rip),%ymm14,%ymm14 7842 vpaddd %ymm14,%ymm10,%ymm10 7843 vpxor %ymm10,%ymm6,%ymm6 7844 vpslld $7,%ymm6,%ymm3 7845 vpsrld $25,%ymm6,%ymm6 7846 vpxor %ymm3,%ymm6,%ymm6 7847 vpalignr $4,%ymm14,%ymm14,%ymm14 7848 vpalignr $8,%ymm10,%ymm10,%ymm10 7849 vpalignr $12,%ymm6,%ymm6,%ymm6 7850 7851 leaq 32(%rdi),%rdi 7852 decq %rcx 7853 jg 1b 7854 decq %r8 7855 jge 2b 7856 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 7857 vpaddd 64(%rbp),%ymm6,%ymm6 7858 vpaddd 96(%rbp),%ymm10,%ymm10 7859 vpaddd 224(%rbp),%ymm14,%ymm14 7860 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 7861 vpaddd 64(%rbp),%ymm5,%ymm5 7862 vpaddd 96(%rbp),%ymm9,%ymm9 7863 vpaddd 192(%rbp),%ymm13,%ymm13 7864 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 7865 vpaddd 64(%rbp),%ymm4,%ymm4 7866 vpaddd 96(%rbp),%ymm8,%ymm8 7867 vpaddd 160(%rbp),%ymm12,%ymm12 7868 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 7869 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 7870 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 7871 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 7872 vpxor 0+0(%rsi),%ymm3,%ymm3 7873 vpxor 32+0(%rsi),%ymm2,%ymm2 7874 vpxor 64+0(%rsi),%ymm6,%ymm6 7875 vpxor 96+0(%rsi),%ymm10,%ymm10 7876 vmovdqu %ymm3,0+0(%rdi) 7877 vmovdqu %ymm2,32+0(%rdi) 7878 vmovdqu %ymm6,64+0(%rdi) 7879 vmovdqu %ymm10,96+0(%rdi) 7880 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 7881 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 7882 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 7883 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 7884 vpxor 0+128(%rsi),%ymm3,%ymm3 7885 vpxor 32+128(%rsi),%ymm1,%ymm1 7886 vpxor 64+128(%rsi),%ymm5,%ymm5 7887 vpxor 96+128(%rsi),%ymm9,%ymm9 7888 vmovdqu %ymm3,0+128(%rdi) 7889 vmovdqu %ymm1,32+128(%rdi) 7890 vmovdqu %ymm5,64+128(%rdi) 7891 vmovdqu %ymm9,96+128(%rdi) 7892 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 7893 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 7894 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 7895 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 7896 vmovdqa %ymm3,%ymm8 7897 7898 movq $256,%rcx 7899 leaq 256(%rsi),%rsi 7900 subq $256,%rbx 7901 jmp seal_avx2_hash 7902 7903seal_avx2_tail_512: 7904 vmovdqa .chacha20_consts(%rip),%ymm0 7905 vmovdqa 64(%rbp),%ymm4 7906 vmovdqa 96(%rbp),%ymm8 7907 vmovdqa %ymm0,%ymm1 7908 vmovdqa %ymm4,%ymm5 7909 vmovdqa %ymm8,%ymm9 7910 vmovdqa %ymm0,%ymm2 7911 vmovdqa %ymm4,%ymm6 7912 vmovdqa %ymm8,%ymm10 7913 vmovdqa %ymm0,%ymm3 7914 vmovdqa %ymm4,%ymm7 7915 vmovdqa %ymm8,%ymm11 7916 vmovdqa .avx2_inc(%rip),%ymm12 7917 vpaddd 160(%rbp),%ymm12,%ymm15 7918 vpaddd %ymm15,%ymm12,%ymm14 7919 vpaddd %ymm14,%ymm12,%ymm13 7920 vpaddd %ymm13,%ymm12,%ymm12 7921 vmovdqa %ymm15,256(%rbp) 7922 vmovdqa %ymm14,224(%rbp) 7923 vmovdqa %ymm13,192(%rbp) 7924 vmovdqa %ymm12,160(%rbp) 7925 79261: 7927 addq 0(%rdi),%r10 7928 adcq 8+0(%rdi),%r11 7929 adcq $1,%r12 7930 movq 0+0(%rbp),%rdx 7931 movq %rdx,%r15 7932 mulxq %r10,%r13,%r14 7933 mulxq %r11,%rax,%rdx 7934 imulq %r12,%r15 7935 addq %rax,%r14 7936 adcq %rdx,%r15 7937 movq 8+0(%rbp),%rdx 7938 mulxq %r10,%r10,%rax 7939 addq %r10,%r14 7940 mulxq %r11,%r11,%r9 7941 adcq %r11,%r15 7942 adcq $0,%r9 7943 imulq %r12,%rdx 7944 addq %rax,%r15 7945 adcq %rdx,%r9 7946 movq %r13,%r10 7947 movq %r14,%r11 7948 movq %r15,%r12 7949 andq $3,%r12 7950 movq %r15,%r13 7951 andq $-4,%r13 7952 movq %r9,%r14 7953 shrdq $2,%r9,%r15 7954 shrq $2,%r9 7955 addq %r13,%r10 7956 adcq %r14,%r11 7957 adcq $0,%r12 7958 addq %r15,%r10 7959 adcq %r9,%r11 7960 adcq $0,%r12 7961 7962 leaq 16(%rdi),%rdi 79632: 7964 vmovdqa %ymm8,128(%rbp) 7965 vmovdqa .rol16(%rip),%ymm8 7966 vpaddd %ymm7,%ymm3,%ymm3 7967 vpaddd %ymm6,%ymm2,%ymm2 7968 vpaddd %ymm5,%ymm1,%ymm1 7969 vpaddd %ymm4,%ymm0,%ymm0 7970 vpxor %ymm3,%ymm15,%ymm15 7971 vpxor %ymm2,%ymm14,%ymm14 7972 vpxor %ymm1,%ymm13,%ymm13 7973 vpxor %ymm0,%ymm12,%ymm12 7974 vpshufb %ymm8,%ymm15,%ymm15 7975 vpshufb %ymm8,%ymm14,%ymm14 7976 vpshufb %ymm8,%ymm13,%ymm13 7977 vpshufb %ymm8,%ymm12,%ymm12 7978 vmovdqa 128(%rbp),%ymm8 7979 vpaddd %ymm15,%ymm11,%ymm11 7980 vpaddd %ymm14,%ymm10,%ymm10 7981 vpaddd %ymm13,%ymm9,%ymm9 7982 vpaddd %ymm12,%ymm8,%ymm8 7983 vpxor %ymm11,%ymm7,%ymm7 7984 addq 0(%rdi),%r10 7985 adcq 8+0(%rdi),%r11 7986 adcq $1,%r12 7987 vpxor %ymm10,%ymm6,%ymm6 7988 vpxor %ymm9,%ymm5,%ymm5 7989 vpxor %ymm8,%ymm4,%ymm4 7990 vmovdqa %ymm8,128(%rbp) 7991 vpsrld $20,%ymm7,%ymm8 7992 vpslld $32-20,%ymm7,%ymm7 7993 vpxor %ymm8,%ymm7,%ymm7 7994 vpsrld $20,%ymm6,%ymm8 7995 vpslld $32-20,%ymm6,%ymm6 7996 vpxor %ymm8,%ymm6,%ymm6 7997 vpsrld $20,%ymm5,%ymm8 7998 vpslld $32-20,%ymm5,%ymm5 7999 vpxor %ymm8,%ymm5,%ymm5 8000 vpsrld $20,%ymm4,%ymm8 8001 vpslld $32-20,%ymm4,%ymm4 8002 vpxor %ymm8,%ymm4,%ymm4 8003 vmovdqa .rol8(%rip),%ymm8 8004 vpaddd %ymm7,%ymm3,%ymm3 8005 vpaddd %ymm6,%ymm2,%ymm2 8006 vpaddd %ymm5,%ymm1,%ymm1 8007 movq 0+0(%rbp),%rdx 8008 movq %rdx,%r15 8009 mulxq %r10,%r13,%r14 8010 mulxq %r11,%rax,%rdx 8011 imulq %r12,%r15 8012 addq %rax,%r14 8013 adcq %rdx,%r15 8014 vpaddd %ymm4,%ymm0,%ymm0 8015 vpxor %ymm3,%ymm15,%ymm15 8016 vpxor %ymm2,%ymm14,%ymm14 8017 vpxor %ymm1,%ymm13,%ymm13 8018 vpxor %ymm0,%ymm12,%ymm12 8019 vpshufb %ymm8,%ymm15,%ymm15 8020 vpshufb %ymm8,%ymm14,%ymm14 8021 vpshufb %ymm8,%ymm13,%ymm13 8022 vpshufb %ymm8,%ymm12,%ymm12 8023 vmovdqa 128(%rbp),%ymm8 8024 vpaddd %ymm15,%ymm11,%ymm11 8025 vpaddd %ymm14,%ymm10,%ymm10 8026 vpaddd %ymm13,%ymm9,%ymm9 8027 vpaddd %ymm12,%ymm8,%ymm8 8028 vpxor %ymm11,%ymm7,%ymm7 8029 vpxor %ymm10,%ymm6,%ymm6 8030 vpxor %ymm9,%ymm5,%ymm5 8031 vpxor %ymm8,%ymm4,%ymm4 8032 vmovdqa %ymm8,128(%rbp) 8033 vpsrld $25,%ymm7,%ymm8 8034 movq 8+0(%rbp),%rdx 8035 mulxq %r10,%r10,%rax 8036 addq %r10,%r14 8037 mulxq %r11,%r11,%r9 8038 adcq %r11,%r15 8039 adcq $0,%r9 8040 imulq %r12,%rdx 8041 vpslld $32-25,%ymm7,%ymm7 8042 vpxor %ymm8,%ymm7,%ymm7 8043 vpsrld $25,%ymm6,%ymm8 8044 vpslld $32-25,%ymm6,%ymm6 8045 vpxor %ymm8,%ymm6,%ymm6 8046 vpsrld $25,%ymm5,%ymm8 8047 vpslld $32-25,%ymm5,%ymm5 8048 vpxor %ymm8,%ymm5,%ymm5 8049 vpsrld $25,%ymm4,%ymm8 8050 vpslld $32-25,%ymm4,%ymm4 8051 vpxor %ymm8,%ymm4,%ymm4 8052 vmovdqa 128(%rbp),%ymm8 8053 vpalignr $4,%ymm7,%ymm7,%ymm7 8054 vpalignr $8,%ymm11,%ymm11,%ymm11 8055 vpalignr $12,%ymm15,%ymm15,%ymm15 8056 vpalignr $4,%ymm6,%ymm6,%ymm6 8057 vpalignr $8,%ymm10,%ymm10,%ymm10 8058 vpalignr $12,%ymm14,%ymm14,%ymm14 8059 vpalignr $4,%ymm5,%ymm5,%ymm5 8060 vpalignr $8,%ymm9,%ymm9,%ymm9 8061 addq %rax,%r15 8062 adcq %rdx,%r9 8063 vpalignr $12,%ymm13,%ymm13,%ymm13 8064 vpalignr $4,%ymm4,%ymm4,%ymm4 8065 vpalignr $8,%ymm8,%ymm8,%ymm8 8066 vpalignr $12,%ymm12,%ymm12,%ymm12 8067 vmovdqa %ymm8,128(%rbp) 8068 vmovdqa .rol16(%rip),%ymm8 8069 vpaddd %ymm7,%ymm3,%ymm3 8070 vpaddd %ymm6,%ymm2,%ymm2 8071 vpaddd %ymm5,%ymm1,%ymm1 8072 vpaddd %ymm4,%ymm0,%ymm0 8073 vpxor %ymm3,%ymm15,%ymm15 8074 vpxor %ymm2,%ymm14,%ymm14 8075 vpxor %ymm1,%ymm13,%ymm13 8076 vpxor %ymm0,%ymm12,%ymm12 8077 vpshufb %ymm8,%ymm15,%ymm15 8078 vpshufb %ymm8,%ymm14,%ymm14 8079 vpshufb %ymm8,%ymm13,%ymm13 8080 vpshufb %ymm8,%ymm12,%ymm12 8081 vmovdqa 128(%rbp),%ymm8 8082 vpaddd %ymm15,%ymm11,%ymm11 8083 movq %r13,%r10 8084 movq %r14,%r11 8085 movq %r15,%r12 8086 andq $3,%r12 8087 movq %r15,%r13 8088 andq $-4,%r13 8089 movq %r9,%r14 8090 shrdq $2,%r9,%r15 8091 shrq $2,%r9 8092 addq %r13,%r10 8093 adcq %r14,%r11 8094 adcq $0,%r12 8095 addq %r15,%r10 8096 adcq %r9,%r11 8097 adcq $0,%r12 8098 vpaddd %ymm14,%ymm10,%ymm10 8099 vpaddd %ymm13,%ymm9,%ymm9 8100 vpaddd %ymm12,%ymm8,%ymm8 8101 vpxor %ymm11,%ymm7,%ymm7 8102 vpxor %ymm10,%ymm6,%ymm6 8103 vpxor %ymm9,%ymm5,%ymm5 8104 vpxor %ymm8,%ymm4,%ymm4 8105 vmovdqa %ymm8,128(%rbp) 8106 vpsrld $20,%ymm7,%ymm8 8107 vpslld $32-20,%ymm7,%ymm7 8108 vpxor %ymm8,%ymm7,%ymm7 8109 vpsrld $20,%ymm6,%ymm8 8110 vpslld $32-20,%ymm6,%ymm6 8111 vpxor %ymm8,%ymm6,%ymm6 8112 vpsrld $20,%ymm5,%ymm8 8113 vpslld $32-20,%ymm5,%ymm5 8114 vpxor %ymm8,%ymm5,%ymm5 8115 vpsrld $20,%ymm4,%ymm8 8116 vpslld $32-20,%ymm4,%ymm4 8117 vpxor %ymm8,%ymm4,%ymm4 8118 addq 16(%rdi),%r10 8119 adcq 8+16(%rdi),%r11 8120 adcq $1,%r12 8121 vmovdqa .rol8(%rip),%ymm8 8122 vpaddd %ymm7,%ymm3,%ymm3 8123 vpaddd %ymm6,%ymm2,%ymm2 8124 vpaddd %ymm5,%ymm1,%ymm1 8125 vpaddd %ymm4,%ymm0,%ymm0 8126 vpxor %ymm3,%ymm15,%ymm15 8127 vpxor %ymm2,%ymm14,%ymm14 8128 vpxor %ymm1,%ymm13,%ymm13 8129 vpxor %ymm0,%ymm12,%ymm12 8130 vpshufb %ymm8,%ymm15,%ymm15 8131 vpshufb %ymm8,%ymm14,%ymm14 8132 vpshufb %ymm8,%ymm13,%ymm13 8133 vpshufb %ymm8,%ymm12,%ymm12 8134 vmovdqa 128(%rbp),%ymm8 8135 vpaddd %ymm15,%ymm11,%ymm11 8136 vpaddd %ymm14,%ymm10,%ymm10 8137 vpaddd %ymm13,%ymm9,%ymm9 8138 vpaddd %ymm12,%ymm8,%ymm8 8139 vpxor %ymm11,%ymm7,%ymm7 8140 vpxor %ymm10,%ymm6,%ymm6 8141 movq 0+0(%rbp),%rdx 8142 movq %rdx,%r15 8143 mulxq %r10,%r13,%r14 8144 mulxq %r11,%rax,%rdx 8145 imulq %r12,%r15 8146 addq %rax,%r14 8147 adcq %rdx,%r15 8148 vpxor %ymm9,%ymm5,%ymm5 8149 vpxor %ymm8,%ymm4,%ymm4 8150 vmovdqa %ymm8,128(%rbp) 8151 vpsrld $25,%ymm7,%ymm8 8152 vpslld $32-25,%ymm7,%ymm7 8153 vpxor %ymm8,%ymm7,%ymm7 8154 vpsrld $25,%ymm6,%ymm8 8155 vpslld $32-25,%ymm6,%ymm6 8156 vpxor %ymm8,%ymm6,%ymm6 8157 vpsrld $25,%ymm5,%ymm8 8158 vpslld $32-25,%ymm5,%ymm5 8159 vpxor %ymm8,%ymm5,%ymm5 8160 vpsrld $25,%ymm4,%ymm8 8161 vpslld $32-25,%ymm4,%ymm4 8162 vpxor %ymm8,%ymm4,%ymm4 8163 vmovdqa 128(%rbp),%ymm8 8164 vpalignr $12,%ymm7,%ymm7,%ymm7 8165 vpalignr $8,%ymm11,%ymm11,%ymm11 8166 vpalignr $4,%ymm15,%ymm15,%ymm15 8167 vpalignr $12,%ymm6,%ymm6,%ymm6 8168 movq 8+0(%rbp),%rdx 8169 mulxq %r10,%r10,%rax 8170 addq %r10,%r14 8171 mulxq %r11,%r11,%r9 8172 adcq %r11,%r15 8173 adcq $0,%r9 8174 imulq %r12,%rdx 8175 vpalignr $8,%ymm10,%ymm10,%ymm10 8176 vpalignr $4,%ymm14,%ymm14,%ymm14 8177 vpalignr $12,%ymm5,%ymm5,%ymm5 8178 vpalignr $8,%ymm9,%ymm9,%ymm9 8179 vpalignr $4,%ymm13,%ymm13,%ymm13 8180 vpalignr $12,%ymm4,%ymm4,%ymm4 8181 vpalignr $8,%ymm8,%ymm8,%ymm8 8182 vpalignr $4,%ymm12,%ymm12,%ymm12 8183 8184 8185 8186 8187 8188 8189 8190 8191 8192 8193 8194 8195 addq %rax,%r15 8196 adcq %rdx,%r9 8197 8198 8199 8200 8201 8202 8203 8204 8205 8206 8207 8208 8209 8210 8211 8212 8213 8214 8215 8216 8217 movq %r13,%r10 8218 movq %r14,%r11 8219 movq %r15,%r12 8220 andq $3,%r12 8221 movq %r15,%r13 8222 andq $-4,%r13 8223 movq %r9,%r14 8224 shrdq $2,%r9,%r15 8225 shrq $2,%r9 8226 addq %r13,%r10 8227 adcq %r14,%r11 8228 adcq $0,%r12 8229 addq %r15,%r10 8230 adcq %r9,%r11 8231 adcq $0,%r12 8232 8233 leaq 32(%rdi),%rdi 8234 decq %rcx 8235 jg 1b 8236 decq %r8 8237 jge 2b 8238 vpaddd .chacha20_consts(%rip),%ymm3,%ymm3 8239 vpaddd 64(%rbp),%ymm7,%ymm7 8240 vpaddd 96(%rbp),%ymm11,%ymm11 8241 vpaddd 256(%rbp),%ymm15,%ymm15 8242 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8243 vpaddd 64(%rbp),%ymm6,%ymm6 8244 vpaddd 96(%rbp),%ymm10,%ymm10 8245 vpaddd 224(%rbp),%ymm14,%ymm14 8246 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8247 vpaddd 64(%rbp),%ymm5,%ymm5 8248 vpaddd 96(%rbp),%ymm9,%ymm9 8249 vpaddd 192(%rbp),%ymm13,%ymm13 8250 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8251 vpaddd 64(%rbp),%ymm4,%ymm4 8252 vpaddd 96(%rbp),%ymm8,%ymm8 8253 vpaddd 160(%rbp),%ymm12,%ymm12 8254 8255 vmovdqa %ymm0,128(%rbp) 8256 vperm2i128 $0x02,%ymm3,%ymm7,%ymm0 8257 vperm2i128 $0x13,%ymm3,%ymm7,%ymm7 8258 vperm2i128 $0x02,%ymm11,%ymm15,%ymm3 8259 vperm2i128 $0x13,%ymm11,%ymm15,%ymm11 8260 vpxor 0+0(%rsi),%ymm0,%ymm0 8261 vpxor 32+0(%rsi),%ymm3,%ymm3 8262 vpxor 64+0(%rsi),%ymm7,%ymm7 8263 vpxor 96+0(%rsi),%ymm11,%ymm11 8264 vmovdqu %ymm0,0+0(%rdi) 8265 vmovdqu %ymm3,32+0(%rdi) 8266 vmovdqu %ymm7,64+0(%rdi) 8267 vmovdqu %ymm11,96+0(%rdi) 8268 8269 vmovdqa 128(%rbp),%ymm0 8270 vperm2i128 $0x02,%ymm2,%ymm6,%ymm3 8271 vperm2i128 $0x13,%ymm2,%ymm6,%ymm6 8272 vperm2i128 $0x02,%ymm10,%ymm14,%ymm2 8273 vperm2i128 $0x13,%ymm10,%ymm14,%ymm10 8274 vpxor 0+128(%rsi),%ymm3,%ymm3 8275 vpxor 32+128(%rsi),%ymm2,%ymm2 8276 vpxor 64+128(%rsi),%ymm6,%ymm6 8277 vpxor 96+128(%rsi),%ymm10,%ymm10 8278 vmovdqu %ymm3,0+128(%rdi) 8279 vmovdqu %ymm2,32+128(%rdi) 8280 vmovdqu %ymm6,64+128(%rdi) 8281 vmovdqu %ymm10,96+128(%rdi) 8282 vperm2i128 $0x02,%ymm1,%ymm5,%ymm3 8283 vperm2i128 $0x13,%ymm1,%ymm5,%ymm5 8284 vperm2i128 $0x02,%ymm9,%ymm13,%ymm1 8285 vperm2i128 $0x13,%ymm9,%ymm13,%ymm9 8286 vpxor 0+256(%rsi),%ymm3,%ymm3 8287 vpxor 32+256(%rsi),%ymm1,%ymm1 8288 vpxor 64+256(%rsi),%ymm5,%ymm5 8289 vpxor 96+256(%rsi),%ymm9,%ymm9 8290 vmovdqu %ymm3,0+256(%rdi) 8291 vmovdqu %ymm1,32+256(%rdi) 8292 vmovdqu %ymm5,64+256(%rdi) 8293 vmovdqu %ymm9,96+256(%rdi) 8294 vperm2i128 $0x13,%ymm0,%ymm4,%ymm3 8295 vperm2i128 $0x02,%ymm0,%ymm4,%ymm0 8296 vperm2i128 $0x02,%ymm8,%ymm12,%ymm4 8297 vperm2i128 $0x13,%ymm8,%ymm12,%ymm12 8298 vmovdqa %ymm3,%ymm8 8299 8300 movq $384,%rcx 8301 leaq 384(%rsi),%rsi 8302 subq $384,%rbx 8303 jmp seal_avx2_hash 8304 8305seal_avx2_320: 8306 vmovdqa %ymm0,%ymm1 8307 vmovdqa %ymm0,%ymm2 8308 vmovdqa %ymm4,%ymm5 8309 vmovdqa %ymm4,%ymm6 8310 vmovdqa %ymm8,%ymm9 8311 vmovdqa %ymm8,%ymm10 8312 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 8313 vpaddd .avx2_inc(%rip),%ymm13,%ymm14 8314 vmovdqa %ymm4,%ymm7 8315 vmovdqa %ymm8,%ymm11 8316 vmovdqa %ymm12,160(%rbp) 8317 vmovdqa %ymm13,192(%rbp) 8318 vmovdqa %ymm14,224(%rbp) 8319 movq $10,%r10 83201: 8321 vpaddd %ymm4,%ymm0,%ymm0 8322 vpxor %ymm0,%ymm12,%ymm12 8323 vpshufb .rol16(%rip),%ymm12,%ymm12 8324 vpaddd %ymm12,%ymm8,%ymm8 8325 vpxor %ymm8,%ymm4,%ymm4 8326 vpsrld $20,%ymm4,%ymm3 8327 vpslld $12,%ymm4,%ymm4 8328 vpxor %ymm3,%ymm4,%ymm4 8329 vpaddd %ymm4,%ymm0,%ymm0 8330 vpxor %ymm0,%ymm12,%ymm12 8331 vpshufb .rol8(%rip),%ymm12,%ymm12 8332 vpaddd %ymm12,%ymm8,%ymm8 8333 vpxor %ymm8,%ymm4,%ymm4 8334 vpslld $7,%ymm4,%ymm3 8335 vpsrld $25,%ymm4,%ymm4 8336 vpxor %ymm3,%ymm4,%ymm4 8337 vpalignr $12,%ymm12,%ymm12,%ymm12 8338 vpalignr $8,%ymm8,%ymm8,%ymm8 8339 vpalignr $4,%ymm4,%ymm4,%ymm4 8340 vpaddd %ymm5,%ymm1,%ymm1 8341 vpxor %ymm1,%ymm13,%ymm13 8342 vpshufb .rol16(%rip),%ymm13,%ymm13 8343 vpaddd %ymm13,%ymm9,%ymm9 8344 vpxor %ymm9,%ymm5,%ymm5 8345 vpsrld $20,%ymm5,%ymm3 8346 vpslld $12,%ymm5,%ymm5 8347 vpxor %ymm3,%ymm5,%ymm5 8348 vpaddd %ymm5,%ymm1,%ymm1 8349 vpxor %ymm1,%ymm13,%ymm13 8350 vpshufb .rol8(%rip),%ymm13,%ymm13 8351 vpaddd %ymm13,%ymm9,%ymm9 8352 vpxor %ymm9,%ymm5,%ymm5 8353 vpslld $7,%ymm5,%ymm3 8354 vpsrld $25,%ymm5,%ymm5 8355 vpxor %ymm3,%ymm5,%ymm5 8356 vpalignr $12,%ymm13,%ymm13,%ymm13 8357 vpalignr $8,%ymm9,%ymm9,%ymm9 8358 vpalignr $4,%ymm5,%ymm5,%ymm5 8359 vpaddd %ymm6,%ymm2,%ymm2 8360 vpxor %ymm2,%ymm14,%ymm14 8361 vpshufb .rol16(%rip),%ymm14,%ymm14 8362 vpaddd %ymm14,%ymm10,%ymm10 8363 vpxor %ymm10,%ymm6,%ymm6 8364 vpsrld $20,%ymm6,%ymm3 8365 vpslld $12,%ymm6,%ymm6 8366 vpxor %ymm3,%ymm6,%ymm6 8367 vpaddd %ymm6,%ymm2,%ymm2 8368 vpxor %ymm2,%ymm14,%ymm14 8369 vpshufb .rol8(%rip),%ymm14,%ymm14 8370 vpaddd %ymm14,%ymm10,%ymm10 8371 vpxor %ymm10,%ymm6,%ymm6 8372 vpslld $7,%ymm6,%ymm3 8373 vpsrld $25,%ymm6,%ymm6 8374 vpxor %ymm3,%ymm6,%ymm6 8375 vpalignr $12,%ymm14,%ymm14,%ymm14 8376 vpalignr $8,%ymm10,%ymm10,%ymm10 8377 vpalignr $4,%ymm6,%ymm6,%ymm6 8378 vpaddd %ymm4,%ymm0,%ymm0 8379 vpxor %ymm0,%ymm12,%ymm12 8380 vpshufb .rol16(%rip),%ymm12,%ymm12 8381 vpaddd %ymm12,%ymm8,%ymm8 8382 vpxor %ymm8,%ymm4,%ymm4 8383 vpsrld $20,%ymm4,%ymm3 8384 vpslld $12,%ymm4,%ymm4 8385 vpxor %ymm3,%ymm4,%ymm4 8386 vpaddd %ymm4,%ymm0,%ymm0 8387 vpxor %ymm0,%ymm12,%ymm12 8388 vpshufb .rol8(%rip),%ymm12,%ymm12 8389 vpaddd %ymm12,%ymm8,%ymm8 8390 vpxor %ymm8,%ymm4,%ymm4 8391 vpslld $7,%ymm4,%ymm3 8392 vpsrld $25,%ymm4,%ymm4 8393 vpxor %ymm3,%ymm4,%ymm4 8394 vpalignr $4,%ymm12,%ymm12,%ymm12 8395 vpalignr $8,%ymm8,%ymm8,%ymm8 8396 vpalignr $12,%ymm4,%ymm4,%ymm4 8397 vpaddd %ymm5,%ymm1,%ymm1 8398 vpxor %ymm1,%ymm13,%ymm13 8399 vpshufb .rol16(%rip),%ymm13,%ymm13 8400 vpaddd %ymm13,%ymm9,%ymm9 8401 vpxor %ymm9,%ymm5,%ymm5 8402 vpsrld $20,%ymm5,%ymm3 8403 vpslld $12,%ymm5,%ymm5 8404 vpxor %ymm3,%ymm5,%ymm5 8405 vpaddd %ymm5,%ymm1,%ymm1 8406 vpxor %ymm1,%ymm13,%ymm13 8407 vpshufb .rol8(%rip),%ymm13,%ymm13 8408 vpaddd %ymm13,%ymm9,%ymm9 8409 vpxor %ymm9,%ymm5,%ymm5 8410 vpslld $7,%ymm5,%ymm3 8411 vpsrld $25,%ymm5,%ymm5 8412 vpxor %ymm3,%ymm5,%ymm5 8413 vpalignr $4,%ymm13,%ymm13,%ymm13 8414 vpalignr $8,%ymm9,%ymm9,%ymm9 8415 vpalignr $12,%ymm5,%ymm5,%ymm5 8416 vpaddd %ymm6,%ymm2,%ymm2 8417 vpxor %ymm2,%ymm14,%ymm14 8418 vpshufb .rol16(%rip),%ymm14,%ymm14 8419 vpaddd %ymm14,%ymm10,%ymm10 8420 vpxor %ymm10,%ymm6,%ymm6 8421 vpsrld $20,%ymm6,%ymm3 8422 vpslld $12,%ymm6,%ymm6 8423 vpxor %ymm3,%ymm6,%ymm6 8424 vpaddd %ymm6,%ymm2,%ymm2 8425 vpxor %ymm2,%ymm14,%ymm14 8426 vpshufb .rol8(%rip),%ymm14,%ymm14 8427 vpaddd %ymm14,%ymm10,%ymm10 8428 vpxor %ymm10,%ymm6,%ymm6 8429 vpslld $7,%ymm6,%ymm3 8430 vpsrld $25,%ymm6,%ymm6 8431 vpxor %ymm3,%ymm6,%ymm6 8432 vpalignr $4,%ymm14,%ymm14,%ymm14 8433 vpalignr $8,%ymm10,%ymm10,%ymm10 8434 vpalignr $12,%ymm6,%ymm6,%ymm6 8435 8436 decq %r10 8437 jne 1b 8438 vpaddd .chacha20_consts(%rip),%ymm0,%ymm0 8439 vpaddd .chacha20_consts(%rip),%ymm1,%ymm1 8440 vpaddd .chacha20_consts(%rip),%ymm2,%ymm2 8441 vpaddd %ymm7,%ymm4,%ymm4 8442 vpaddd %ymm7,%ymm5,%ymm5 8443 vpaddd %ymm7,%ymm6,%ymm6 8444 vpaddd %ymm11,%ymm8,%ymm8 8445 vpaddd %ymm11,%ymm9,%ymm9 8446 vpaddd %ymm11,%ymm10,%ymm10 8447 vpaddd 160(%rbp),%ymm12,%ymm12 8448 vpaddd 192(%rbp),%ymm13,%ymm13 8449 vpaddd 224(%rbp),%ymm14,%ymm14 8450 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8451 8452 vpand .clamp(%rip),%ymm3,%ymm3 8453 vmovdqa %ymm3,0(%rbp) 8454 8455 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8456 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8457 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8458 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8459 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8460 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8461 vperm2i128 $0x02,%ymm2,%ymm6,%ymm9 8462 vperm2i128 $0x02,%ymm10,%ymm14,%ymm13 8463 vperm2i128 $0x13,%ymm2,%ymm6,%ymm2 8464 vperm2i128 $0x13,%ymm10,%ymm14,%ymm6 8465 jmp seal_avx2_short 8466 8467seal_avx2_192: 8468 vmovdqa %ymm0,%ymm1 8469 vmovdqa %ymm0,%ymm2 8470 vmovdqa %ymm4,%ymm5 8471 vmovdqa %ymm4,%ymm6 8472 vmovdqa %ymm8,%ymm9 8473 vmovdqa %ymm8,%ymm10 8474 vpaddd .avx2_inc(%rip),%ymm12,%ymm13 8475 vmovdqa %ymm12,%ymm11 8476 vmovdqa %ymm13,%ymm15 8477 movq $10,%r10 84781: 8479 vpaddd %ymm4,%ymm0,%ymm0 8480 vpxor %ymm0,%ymm12,%ymm12 8481 vpshufb .rol16(%rip),%ymm12,%ymm12 8482 vpaddd %ymm12,%ymm8,%ymm8 8483 vpxor %ymm8,%ymm4,%ymm4 8484 vpsrld $20,%ymm4,%ymm3 8485 vpslld $12,%ymm4,%ymm4 8486 vpxor %ymm3,%ymm4,%ymm4 8487 vpaddd %ymm4,%ymm0,%ymm0 8488 vpxor %ymm0,%ymm12,%ymm12 8489 vpshufb .rol8(%rip),%ymm12,%ymm12 8490 vpaddd %ymm12,%ymm8,%ymm8 8491 vpxor %ymm8,%ymm4,%ymm4 8492 vpslld $7,%ymm4,%ymm3 8493 vpsrld $25,%ymm4,%ymm4 8494 vpxor %ymm3,%ymm4,%ymm4 8495 vpalignr $12,%ymm12,%ymm12,%ymm12 8496 vpalignr $8,%ymm8,%ymm8,%ymm8 8497 vpalignr $4,%ymm4,%ymm4,%ymm4 8498 vpaddd %ymm5,%ymm1,%ymm1 8499 vpxor %ymm1,%ymm13,%ymm13 8500 vpshufb .rol16(%rip),%ymm13,%ymm13 8501 vpaddd %ymm13,%ymm9,%ymm9 8502 vpxor %ymm9,%ymm5,%ymm5 8503 vpsrld $20,%ymm5,%ymm3 8504 vpslld $12,%ymm5,%ymm5 8505 vpxor %ymm3,%ymm5,%ymm5 8506 vpaddd %ymm5,%ymm1,%ymm1 8507 vpxor %ymm1,%ymm13,%ymm13 8508 vpshufb .rol8(%rip),%ymm13,%ymm13 8509 vpaddd %ymm13,%ymm9,%ymm9 8510 vpxor %ymm9,%ymm5,%ymm5 8511 vpslld $7,%ymm5,%ymm3 8512 vpsrld $25,%ymm5,%ymm5 8513 vpxor %ymm3,%ymm5,%ymm5 8514 vpalignr $12,%ymm13,%ymm13,%ymm13 8515 vpalignr $8,%ymm9,%ymm9,%ymm9 8516 vpalignr $4,%ymm5,%ymm5,%ymm5 8517 vpaddd %ymm4,%ymm0,%ymm0 8518 vpxor %ymm0,%ymm12,%ymm12 8519 vpshufb .rol16(%rip),%ymm12,%ymm12 8520 vpaddd %ymm12,%ymm8,%ymm8 8521 vpxor %ymm8,%ymm4,%ymm4 8522 vpsrld $20,%ymm4,%ymm3 8523 vpslld $12,%ymm4,%ymm4 8524 vpxor %ymm3,%ymm4,%ymm4 8525 vpaddd %ymm4,%ymm0,%ymm0 8526 vpxor %ymm0,%ymm12,%ymm12 8527 vpshufb .rol8(%rip),%ymm12,%ymm12 8528 vpaddd %ymm12,%ymm8,%ymm8 8529 vpxor %ymm8,%ymm4,%ymm4 8530 vpslld $7,%ymm4,%ymm3 8531 vpsrld $25,%ymm4,%ymm4 8532 vpxor %ymm3,%ymm4,%ymm4 8533 vpalignr $4,%ymm12,%ymm12,%ymm12 8534 vpalignr $8,%ymm8,%ymm8,%ymm8 8535 vpalignr $12,%ymm4,%ymm4,%ymm4 8536 vpaddd %ymm5,%ymm1,%ymm1 8537 vpxor %ymm1,%ymm13,%ymm13 8538 vpshufb .rol16(%rip),%ymm13,%ymm13 8539 vpaddd %ymm13,%ymm9,%ymm9 8540 vpxor %ymm9,%ymm5,%ymm5 8541 vpsrld $20,%ymm5,%ymm3 8542 vpslld $12,%ymm5,%ymm5 8543 vpxor %ymm3,%ymm5,%ymm5 8544 vpaddd %ymm5,%ymm1,%ymm1 8545 vpxor %ymm1,%ymm13,%ymm13 8546 vpshufb .rol8(%rip),%ymm13,%ymm13 8547 vpaddd %ymm13,%ymm9,%ymm9 8548 vpxor %ymm9,%ymm5,%ymm5 8549 vpslld $7,%ymm5,%ymm3 8550 vpsrld $25,%ymm5,%ymm5 8551 vpxor %ymm3,%ymm5,%ymm5 8552 vpalignr $4,%ymm13,%ymm13,%ymm13 8553 vpalignr $8,%ymm9,%ymm9,%ymm9 8554 vpalignr $12,%ymm5,%ymm5,%ymm5 8555 8556 decq %r10 8557 jne 1b 8558 vpaddd %ymm2,%ymm0,%ymm0 8559 vpaddd %ymm2,%ymm1,%ymm1 8560 vpaddd %ymm6,%ymm4,%ymm4 8561 vpaddd %ymm6,%ymm5,%ymm5 8562 vpaddd %ymm10,%ymm8,%ymm8 8563 vpaddd %ymm10,%ymm9,%ymm9 8564 vpaddd %ymm11,%ymm12,%ymm12 8565 vpaddd %ymm15,%ymm13,%ymm13 8566 vperm2i128 $0x02,%ymm0,%ymm4,%ymm3 8567 8568 vpand .clamp(%rip),%ymm3,%ymm3 8569 vmovdqa %ymm3,0(%rbp) 8570 8571 vperm2i128 $0x13,%ymm0,%ymm4,%ymm0 8572 vperm2i128 $0x13,%ymm8,%ymm12,%ymm4 8573 vperm2i128 $0x02,%ymm1,%ymm5,%ymm8 8574 vperm2i128 $0x02,%ymm9,%ymm13,%ymm12 8575 vperm2i128 $0x13,%ymm1,%ymm5,%ymm1 8576 vperm2i128 $0x13,%ymm9,%ymm13,%ymm5 8577seal_avx2_short: 8578 movq %r8,%r8 8579 call poly_hash_ad_internal 8580 xorq %rcx,%rcx 8581seal_avx2_hash: 8582 cmpq $16,%rcx 8583 jb seal_avx2_short_loop 8584 addq 0(%rdi),%r10 8585 adcq 8+0(%rdi),%r11 8586 adcq $1,%r12 8587 movq 0+0(%rbp),%rax 8588 movq %rax,%r15 8589 mulq %r10 8590 movq %rax,%r13 8591 movq %rdx,%r14 8592 movq 0+0(%rbp),%rax 8593 mulq %r11 8594 imulq %r12,%r15 8595 addq %rax,%r14 8596 adcq %rdx,%r15 8597 movq 8+0(%rbp),%rax 8598 movq %rax,%r9 8599 mulq %r10 8600 addq %rax,%r14 8601 adcq $0,%rdx 8602 movq %rdx,%r10 8603 movq 8+0(%rbp),%rax 8604 mulq %r11 8605 addq %rax,%r15 8606 adcq $0,%rdx 8607 imulq %r12,%r9 8608 addq %r10,%r15 8609 adcq %rdx,%r9 8610 movq %r13,%r10 8611 movq %r14,%r11 8612 movq %r15,%r12 8613 andq $3,%r12 8614 movq %r15,%r13 8615 andq $-4,%r13 8616 movq %r9,%r14 8617 shrdq $2,%r9,%r15 8618 shrq $2,%r9 8619 addq %r13,%r10 8620 adcq %r14,%r11 8621 adcq $0,%r12 8622 addq %r15,%r10 8623 adcq %r9,%r11 8624 adcq $0,%r12 8625 8626 subq $16,%rcx 8627 addq $16,%rdi 8628 jmp seal_avx2_hash 8629seal_avx2_short_loop: 8630 cmpq $32,%rbx 8631 jb seal_avx2_short_tail 8632 subq $32,%rbx 8633 8634 vpxor (%rsi),%ymm0,%ymm0 8635 vmovdqu %ymm0,(%rdi) 8636 leaq 32(%rsi),%rsi 8637 8638 addq 0(%rdi),%r10 8639 adcq 8+0(%rdi),%r11 8640 adcq $1,%r12 8641 movq 0+0(%rbp),%rax 8642 movq %rax,%r15 8643 mulq %r10 8644 movq %rax,%r13 8645 movq %rdx,%r14 8646 movq 0+0(%rbp),%rax 8647 mulq %r11 8648 imulq %r12,%r15 8649 addq %rax,%r14 8650 adcq %rdx,%r15 8651 movq 8+0(%rbp),%rax 8652 movq %rax,%r9 8653 mulq %r10 8654 addq %rax,%r14 8655 adcq $0,%rdx 8656 movq %rdx,%r10 8657 movq 8+0(%rbp),%rax 8658 mulq %r11 8659 addq %rax,%r15 8660 adcq $0,%rdx 8661 imulq %r12,%r9 8662 addq %r10,%r15 8663 adcq %rdx,%r9 8664 movq %r13,%r10 8665 movq %r14,%r11 8666 movq %r15,%r12 8667 andq $3,%r12 8668 movq %r15,%r13 8669 andq $-4,%r13 8670 movq %r9,%r14 8671 shrdq $2,%r9,%r15 8672 shrq $2,%r9 8673 addq %r13,%r10 8674 adcq %r14,%r11 8675 adcq $0,%r12 8676 addq %r15,%r10 8677 adcq %r9,%r11 8678 adcq $0,%r12 8679 addq 16(%rdi),%r10 8680 adcq 8+16(%rdi),%r11 8681 adcq $1,%r12 8682 movq 0+0(%rbp),%rax 8683 movq %rax,%r15 8684 mulq %r10 8685 movq %rax,%r13 8686 movq %rdx,%r14 8687 movq 0+0(%rbp),%rax 8688 mulq %r11 8689 imulq %r12,%r15 8690 addq %rax,%r14 8691 adcq %rdx,%r15 8692 movq 8+0(%rbp),%rax 8693 movq %rax,%r9 8694 mulq %r10 8695 addq %rax,%r14 8696 adcq $0,%rdx 8697 movq %rdx,%r10 8698 movq 8+0(%rbp),%rax 8699 mulq %r11 8700 addq %rax,%r15 8701 adcq $0,%rdx 8702 imulq %r12,%r9 8703 addq %r10,%r15 8704 adcq %rdx,%r9 8705 movq %r13,%r10 8706 movq %r14,%r11 8707 movq %r15,%r12 8708 andq $3,%r12 8709 movq %r15,%r13 8710 andq $-4,%r13 8711 movq %r9,%r14 8712 shrdq $2,%r9,%r15 8713 shrq $2,%r9 8714 addq %r13,%r10 8715 adcq %r14,%r11 8716 adcq $0,%r12 8717 addq %r15,%r10 8718 adcq %r9,%r11 8719 adcq $0,%r12 8720 8721 leaq 32(%rdi),%rdi 8722 8723 vmovdqa %ymm4,%ymm0 8724 vmovdqa %ymm8,%ymm4 8725 vmovdqa %ymm12,%ymm8 8726 vmovdqa %ymm1,%ymm12 8727 vmovdqa %ymm5,%ymm1 8728 vmovdqa %ymm9,%ymm5 8729 vmovdqa %ymm13,%ymm9 8730 vmovdqa %ymm2,%ymm13 8731 vmovdqa %ymm6,%ymm2 8732 jmp seal_avx2_short_loop 8733seal_avx2_short_tail: 8734 cmpq $16,%rbx 8735 jb 1f 8736 subq $16,%rbx 8737 vpxor (%rsi),%xmm0,%xmm3 8738 vmovdqu %xmm3,(%rdi) 8739 leaq 16(%rsi),%rsi 8740 addq 0(%rdi),%r10 8741 adcq 8+0(%rdi),%r11 8742 adcq $1,%r12 8743 movq 0+0(%rbp),%rax 8744 movq %rax,%r15 8745 mulq %r10 8746 movq %rax,%r13 8747 movq %rdx,%r14 8748 movq 0+0(%rbp),%rax 8749 mulq %r11 8750 imulq %r12,%r15 8751 addq %rax,%r14 8752 adcq %rdx,%r15 8753 movq 8+0(%rbp),%rax 8754 movq %rax,%r9 8755 mulq %r10 8756 addq %rax,%r14 8757 adcq $0,%rdx 8758 movq %rdx,%r10 8759 movq 8+0(%rbp),%rax 8760 mulq %r11 8761 addq %rax,%r15 8762 adcq $0,%rdx 8763 imulq %r12,%r9 8764 addq %r10,%r15 8765 adcq %rdx,%r9 8766 movq %r13,%r10 8767 movq %r14,%r11 8768 movq %r15,%r12 8769 andq $3,%r12 8770 movq %r15,%r13 8771 andq $-4,%r13 8772 movq %r9,%r14 8773 shrdq $2,%r9,%r15 8774 shrq $2,%r9 8775 addq %r13,%r10 8776 adcq %r14,%r11 8777 adcq $0,%r12 8778 addq %r15,%r10 8779 adcq %r9,%r11 8780 adcq $0,%r12 8781 8782 leaq 16(%rdi),%rdi 8783 vextracti128 $1,%ymm0,%xmm0 87841: 8785 vzeroupper 8786 jmp seal_sse_tail_16 8787.cfi_endproc 8788#endif 8789