1#if defined(__i386__) 2.file "chacha-x86.S" 3.text 4.globl ChaCha20_ctr32 5.hidden ChaCha20_ctr32 6.type ChaCha20_ctr32,@function 7.align 16 8ChaCha20_ctr32: 9.L_ChaCha20_ctr32_begin: 10 pushl %ebp 11 pushl %ebx 12 pushl %esi 13 pushl %edi 14 xorl %eax,%eax 15 cmpl 28(%esp),%eax 16 je .L000no_data 17 call .Lpic_point 18.Lpic_point: 19 popl %eax 20 leal OPENSSL_ia32cap_P-.Lpic_point(%eax),%ebp 21 testl $16777216,(%ebp) 22 jz .L001x86 23 testl $512,4(%ebp) 24 jz .L001x86 25 jmp .Lssse3_shortcut 26.L001x86: 27 movl 32(%esp),%esi 28 movl 36(%esp),%edi 29 subl $132,%esp 30 movl (%esi),%eax 31 movl 4(%esi),%ebx 32 movl 8(%esi),%ecx 33 movl 12(%esi),%edx 34 movl %eax,80(%esp) 35 movl %ebx,84(%esp) 36 movl %ecx,88(%esp) 37 movl %edx,92(%esp) 38 movl 16(%esi),%eax 39 movl 20(%esi),%ebx 40 movl 24(%esi),%ecx 41 movl 28(%esi),%edx 42 movl %eax,96(%esp) 43 movl %ebx,100(%esp) 44 movl %ecx,104(%esp) 45 movl %edx,108(%esp) 46 movl (%edi),%eax 47 movl 4(%edi),%ebx 48 movl 8(%edi),%ecx 49 movl 12(%edi),%edx 50 subl $1,%eax 51 movl %eax,112(%esp) 52 movl %ebx,116(%esp) 53 movl %ecx,120(%esp) 54 movl %edx,124(%esp) 55 jmp .L002entry 56.align 16 57.L003outer_loop: 58 movl %ebx,156(%esp) 59 movl %eax,152(%esp) 60 movl %ecx,160(%esp) 61.L002entry: 62 movl $1634760805,%eax 63 movl $857760878,4(%esp) 64 movl $2036477234,8(%esp) 65 movl $1797285236,12(%esp) 66 movl 84(%esp),%ebx 67 movl 88(%esp),%ebp 68 movl 104(%esp),%ecx 69 movl 108(%esp),%esi 70 movl 116(%esp),%edx 71 movl 120(%esp),%edi 72 movl %ebx,20(%esp) 73 movl %ebp,24(%esp) 74 movl %ecx,40(%esp) 75 movl %esi,44(%esp) 76 movl %edx,52(%esp) 77 movl %edi,56(%esp) 78 movl 92(%esp),%ebx 79 movl 124(%esp),%edi 80 movl 112(%esp),%edx 81 movl 80(%esp),%ebp 82 movl 96(%esp),%ecx 83 movl 100(%esp),%esi 84 addl $1,%edx 85 movl %ebx,28(%esp) 86 movl %edi,60(%esp) 87 movl %edx,112(%esp) 88 movl $10,%ebx 89 jmp .L004loop 90.align 16 91.L004loop: 92 addl %ebp,%eax 93 movl %ebx,128(%esp) 94 movl %ebp,%ebx 95 xorl %eax,%edx 96 roll $16,%edx 97 addl %edx,%ecx 98 xorl %ecx,%ebx 99 movl 52(%esp),%edi 100 roll $12,%ebx 101 movl 20(%esp),%ebp 102 addl %ebx,%eax 103 xorl %eax,%edx 104 movl %eax,(%esp) 105 roll $8,%edx 106 movl 4(%esp),%eax 107 addl %edx,%ecx 108 movl %edx,48(%esp) 109 xorl %ecx,%ebx 110 addl %ebp,%eax 111 roll $7,%ebx 112 xorl %eax,%edi 113 movl %ecx,32(%esp) 114 roll $16,%edi 115 movl %ebx,16(%esp) 116 addl %edi,%esi 117 movl 40(%esp),%ecx 118 xorl %esi,%ebp 119 movl 56(%esp),%edx 120 roll $12,%ebp 121 movl 24(%esp),%ebx 122 addl %ebp,%eax 123 xorl %eax,%edi 124 movl %eax,4(%esp) 125 roll $8,%edi 126 movl 8(%esp),%eax 127 addl %edi,%esi 128 movl %edi,52(%esp) 129 xorl %esi,%ebp 130 addl %ebx,%eax 131 roll $7,%ebp 132 xorl %eax,%edx 133 movl %esi,36(%esp) 134 roll $16,%edx 135 movl %ebp,20(%esp) 136 addl %edx,%ecx 137 movl 44(%esp),%esi 138 xorl %ecx,%ebx 139 movl 60(%esp),%edi 140 roll $12,%ebx 141 movl 28(%esp),%ebp 142 addl %ebx,%eax 143 xorl %eax,%edx 144 movl %eax,8(%esp) 145 roll $8,%edx 146 movl 12(%esp),%eax 147 addl %edx,%ecx 148 movl %edx,56(%esp) 149 xorl %ecx,%ebx 150 addl %ebp,%eax 151 roll $7,%ebx 152 xorl %eax,%edi 153 roll $16,%edi 154 movl %ebx,24(%esp) 155 addl %edi,%esi 156 xorl %esi,%ebp 157 roll $12,%ebp 158 movl 20(%esp),%ebx 159 addl %ebp,%eax 160 xorl %eax,%edi 161 movl %eax,12(%esp) 162 roll $8,%edi 163 movl (%esp),%eax 164 addl %edi,%esi 165 movl %edi,%edx 166 xorl %esi,%ebp 167 addl %ebx,%eax 168 roll $7,%ebp 169 xorl %eax,%edx 170 roll $16,%edx 171 movl %ebp,28(%esp) 172 addl %edx,%ecx 173 xorl %ecx,%ebx 174 movl 48(%esp),%edi 175 roll $12,%ebx 176 movl 24(%esp),%ebp 177 addl %ebx,%eax 178 xorl %eax,%edx 179 movl %eax,(%esp) 180 roll $8,%edx 181 movl 4(%esp),%eax 182 addl %edx,%ecx 183 movl %edx,60(%esp) 184 xorl %ecx,%ebx 185 addl %ebp,%eax 186 roll $7,%ebx 187 xorl %eax,%edi 188 movl %ecx,40(%esp) 189 roll $16,%edi 190 movl %ebx,20(%esp) 191 addl %edi,%esi 192 movl 32(%esp),%ecx 193 xorl %esi,%ebp 194 movl 52(%esp),%edx 195 roll $12,%ebp 196 movl 28(%esp),%ebx 197 addl %ebp,%eax 198 xorl %eax,%edi 199 movl %eax,4(%esp) 200 roll $8,%edi 201 movl 8(%esp),%eax 202 addl %edi,%esi 203 movl %edi,48(%esp) 204 xorl %esi,%ebp 205 addl %ebx,%eax 206 roll $7,%ebp 207 xorl %eax,%edx 208 movl %esi,44(%esp) 209 roll $16,%edx 210 movl %ebp,24(%esp) 211 addl %edx,%ecx 212 movl 36(%esp),%esi 213 xorl %ecx,%ebx 214 movl 56(%esp),%edi 215 roll $12,%ebx 216 movl 16(%esp),%ebp 217 addl %ebx,%eax 218 xorl %eax,%edx 219 movl %eax,8(%esp) 220 roll $8,%edx 221 movl 12(%esp),%eax 222 addl %edx,%ecx 223 movl %edx,52(%esp) 224 xorl %ecx,%ebx 225 addl %ebp,%eax 226 roll $7,%ebx 227 xorl %eax,%edi 228 roll $16,%edi 229 movl %ebx,28(%esp) 230 addl %edi,%esi 231 xorl %esi,%ebp 232 movl 48(%esp),%edx 233 roll $12,%ebp 234 movl 128(%esp),%ebx 235 addl %ebp,%eax 236 xorl %eax,%edi 237 movl %eax,12(%esp) 238 roll $8,%edi 239 movl (%esp),%eax 240 addl %edi,%esi 241 movl %edi,56(%esp) 242 xorl %esi,%ebp 243 roll $7,%ebp 244 decl %ebx 245 jnz .L004loop 246 movl 160(%esp),%ebx 247 addl $1634760805,%eax 248 addl 80(%esp),%ebp 249 addl 96(%esp),%ecx 250 addl 100(%esp),%esi 251 cmpl $64,%ebx 252 jb .L005tail 253 movl 156(%esp),%ebx 254 addl 112(%esp),%edx 255 addl 120(%esp),%edi 256 xorl (%ebx),%eax 257 xorl 16(%ebx),%ebp 258 movl %eax,(%esp) 259 movl 152(%esp),%eax 260 xorl 32(%ebx),%ecx 261 xorl 36(%ebx),%esi 262 xorl 48(%ebx),%edx 263 xorl 56(%ebx),%edi 264 movl %ebp,16(%eax) 265 movl %ecx,32(%eax) 266 movl %esi,36(%eax) 267 movl %edx,48(%eax) 268 movl %edi,56(%eax) 269 movl 4(%esp),%ebp 270 movl 8(%esp),%ecx 271 movl 12(%esp),%esi 272 movl 20(%esp),%edx 273 movl 24(%esp),%edi 274 addl $857760878,%ebp 275 addl $2036477234,%ecx 276 addl $1797285236,%esi 277 addl 84(%esp),%edx 278 addl 88(%esp),%edi 279 xorl 4(%ebx),%ebp 280 xorl 8(%ebx),%ecx 281 xorl 12(%ebx),%esi 282 xorl 20(%ebx),%edx 283 xorl 24(%ebx),%edi 284 movl %ebp,4(%eax) 285 movl %ecx,8(%eax) 286 movl %esi,12(%eax) 287 movl %edx,20(%eax) 288 movl %edi,24(%eax) 289 movl 28(%esp),%ebp 290 movl 40(%esp),%ecx 291 movl 44(%esp),%esi 292 movl 52(%esp),%edx 293 movl 60(%esp),%edi 294 addl 92(%esp),%ebp 295 addl 104(%esp),%ecx 296 addl 108(%esp),%esi 297 addl 116(%esp),%edx 298 addl 124(%esp),%edi 299 xorl 28(%ebx),%ebp 300 xorl 40(%ebx),%ecx 301 xorl 44(%ebx),%esi 302 xorl 52(%ebx),%edx 303 xorl 60(%ebx),%edi 304 leal 64(%ebx),%ebx 305 movl %ebp,28(%eax) 306 movl (%esp),%ebp 307 movl %ecx,40(%eax) 308 movl 160(%esp),%ecx 309 movl %esi,44(%eax) 310 movl %edx,52(%eax) 311 movl %edi,60(%eax) 312 movl %ebp,(%eax) 313 leal 64(%eax),%eax 314 subl $64,%ecx 315 jnz .L003outer_loop 316 jmp .L006done 317.L005tail: 318 addl 112(%esp),%edx 319 addl 120(%esp),%edi 320 movl %eax,(%esp) 321 movl %ebp,16(%esp) 322 movl %ecx,32(%esp) 323 movl %esi,36(%esp) 324 movl %edx,48(%esp) 325 movl %edi,56(%esp) 326 movl 4(%esp),%ebp 327 movl 8(%esp),%ecx 328 movl 12(%esp),%esi 329 movl 20(%esp),%edx 330 movl 24(%esp),%edi 331 addl $857760878,%ebp 332 addl $2036477234,%ecx 333 addl $1797285236,%esi 334 addl 84(%esp),%edx 335 addl 88(%esp),%edi 336 movl %ebp,4(%esp) 337 movl %ecx,8(%esp) 338 movl %esi,12(%esp) 339 movl %edx,20(%esp) 340 movl %edi,24(%esp) 341 movl 28(%esp),%ebp 342 movl 40(%esp),%ecx 343 movl 44(%esp),%esi 344 movl 52(%esp),%edx 345 movl 60(%esp),%edi 346 addl 92(%esp),%ebp 347 addl 104(%esp),%ecx 348 addl 108(%esp),%esi 349 addl 116(%esp),%edx 350 addl 124(%esp),%edi 351 movl %ebp,28(%esp) 352 movl 156(%esp),%ebp 353 movl %ecx,40(%esp) 354 movl 152(%esp),%ecx 355 movl %esi,44(%esp) 356 xorl %esi,%esi 357 movl %edx,52(%esp) 358 movl %edi,60(%esp) 359 xorl %eax,%eax 360 xorl %edx,%edx 361.L007tail_loop: 362 movb (%esi,%ebp,1),%al 363 movb (%esp,%esi,1),%dl 364 leal 1(%esi),%esi 365 xorb %dl,%al 366 movb %al,-1(%ecx,%esi,1) 367 decl %ebx 368 jnz .L007tail_loop 369.L006done: 370 addl $132,%esp 371.L000no_data: 372 popl %edi 373 popl %esi 374 popl %ebx 375 popl %ebp 376 ret 377.size ChaCha20_ctr32,.-.L_ChaCha20_ctr32_begin 378.globl ChaCha20_ssse3 379.hidden ChaCha20_ssse3 380.type ChaCha20_ssse3,@function 381.align 16 382ChaCha20_ssse3: 383.L_ChaCha20_ssse3_begin: 384 pushl %ebp 385 pushl %ebx 386 pushl %esi 387 pushl %edi 388.Lssse3_shortcut: 389 movl 20(%esp),%edi 390 movl 24(%esp),%esi 391 movl 28(%esp),%ecx 392 movl 32(%esp),%edx 393 movl 36(%esp),%ebx 394 movl %esp,%ebp 395 subl $524,%esp 396 andl $-64,%esp 397 movl %ebp,512(%esp) 398 leal .Lssse3_data-.Lpic_point(%eax),%eax 399 movdqu (%ebx),%xmm3 400 cmpl $256,%ecx 401 jb .L0081x 402 movl %edx,516(%esp) 403 movl %ebx,520(%esp) 404 subl $256,%ecx 405 leal 384(%esp),%ebp 406 movdqu (%edx),%xmm7 407 pshufd $0,%xmm3,%xmm0 408 pshufd $85,%xmm3,%xmm1 409 pshufd $170,%xmm3,%xmm2 410 pshufd $255,%xmm3,%xmm3 411 paddd 48(%eax),%xmm0 412 pshufd $0,%xmm7,%xmm4 413 pshufd $85,%xmm7,%xmm5 414 psubd 64(%eax),%xmm0 415 pshufd $170,%xmm7,%xmm6 416 pshufd $255,%xmm7,%xmm7 417 movdqa %xmm0,64(%ebp) 418 movdqa %xmm1,80(%ebp) 419 movdqa %xmm2,96(%ebp) 420 movdqa %xmm3,112(%ebp) 421 movdqu 16(%edx),%xmm3 422 movdqa %xmm4,-64(%ebp) 423 movdqa %xmm5,-48(%ebp) 424 movdqa %xmm6,-32(%ebp) 425 movdqa %xmm7,-16(%ebp) 426 movdqa 32(%eax),%xmm7 427 leal 128(%esp),%ebx 428 pshufd $0,%xmm3,%xmm0 429 pshufd $85,%xmm3,%xmm1 430 pshufd $170,%xmm3,%xmm2 431 pshufd $255,%xmm3,%xmm3 432 pshufd $0,%xmm7,%xmm4 433 pshufd $85,%xmm7,%xmm5 434 pshufd $170,%xmm7,%xmm6 435 pshufd $255,%xmm7,%xmm7 436 movdqa %xmm0,(%ebp) 437 movdqa %xmm1,16(%ebp) 438 movdqa %xmm2,32(%ebp) 439 movdqa %xmm3,48(%ebp) 440 movdqa %xmm4,-128(%ebp) 441 movdqa %xmm5,-112(%ebp) 442 movdqa %xmm6,-96(%ebp) 443 movdqa %xmm7,-80(%ebp) 444 leal 128(%esi),%esi 445 leal 128(%edi),%edi 446 jmp .L009outer_loop 447.align 16 448.L009outer_loop: 449 movdqa -112(%ebp),%xmm1 450 movdqa -96(%ebp),%xmm2 451 movdqa -80(%ebp),%xmm3 452 movdqa -48(%ebp),%xmm5 453 movdqa -32(%ebp),%xmm6 454 movdqa -16(%ebp),%xmm7 455 movdqa %xmm1,-112(%ebx) 456 movdqa %xmm2,-96(%ebx) 457 movdqa %xmm3,-80(%ebx) 458 movdqa %xmm5,-48(%ebx) 459 movdqa %xmm6,-32(%ebx) 460 movdqa %xmm7,-16(%ebx) 461 movdqa 32(%ebp),%xmm2 462 movdqa 48(%ebp),%xmm3 463 movdqa 64(%ebp),%xmm4 464 movdqa 80(%ebp),%xmm5 465 movdqa 96(%ebp),%xmm6 466 movdqa 112(%ebp),%xmm7 467 paddd 64(%eax),%xmm4 468 movdqa %xmm2,32(%ebx) 469 movdqa %xmm3,48(%ebx) 470 movdqa %xmm4,64(%ebx) 471 movdqa %xmm5,80(%ebx) 472 movdqa %xmm6,96(%ebx) 473 movdqa %xmm7,112(%ebx) 474 movdqa %xmm4,64(%ebp) 475 movdqa -128(%ebp),%xmm0 476 movdqa %xmm4,%xmm6 477 movdqa -64(%ebp),%xmm3 478 movdqa (%ebp),%xmm4 479 movdqa 16(%ebp),%xmm5 480 movl $10,%edx 481 nop 482.align 16 483.L010loop: 484 paddd %xmm3,%xmm0 485 movdqa %xmm3,%xmm2 486 pxor %xmm0,%xmm6 487 pshufb (%eax),%xmm6 488 paddd %xmm6,%xmm4 489 pxor %xmm4,%xmm2 490 movdqa -48(%ebx),%xmm3 491 movdqa %xmm2,%xmm1 492 pslld $12,%xmm2 493 psrld $20,%xmm1 494 por %xmm1,%xmm2 495 movdqa -112(%ebx),%xmm1 496 paddd %xmm2,%xmm0 497 movdqa 80(%ebx),%xmm7 498 pxor %xmm0,%xmm6 499 movdqa %xmm0,-128(%ebx) 500 pshufb 16(%eax),%xmm6 501 paddd %xmm6,%xmm4 502 movdqa %xmm6,64(%ebx) 503 pxor %xmm4,%xmm2 504 paddd %xmm3,%xmm1 505 movdqa %xmm2,%xmm0 506 pslld $7,%xmm2 507 psrld $25,%xmm0 508 pxor %xmm1,%xmm7 509 por %xmm0,%xmm2 510 movdqa %xmm4,(%ebx) 511 pshufb (%eax),%xmm7 512 movdqa %xmm2,-64(%ebx) 513 paddd %xmm7,%xmm5 514 movdqa 32(%ebx),%xmm4 515 pxor %xmm5,%xmm3 516 movdqa -32(%ebx),%xmm2 517 movdqa %xmm3,%xmm0 518 pslld $12,%xmm3 519 psrld $20,%xmm0 520 por %xmm0,%xmm3 521 movdqa -96(%ebx),%xmm0 522 paddd %xmm3,%xmm1 523 movdqa 96(%ebx),%xmm6 524 pxor %xmm1,%xmm7 525 movdqa %xmm1,-112(%ebx) 526 pshufb 16(%eax),%xmm7 527 paddd %xmm7,%xmm5 528 movdqa %xmm7,80(%ebx) 529 pxor %xmm5,%xmm3 530 paddd %xmm2,%xmm0 531 movdqa %xmm3,%xmm1 532 pslld $7,%xmm3 533 psrld $25,%xmm1 534 pxor %xmm0,%xmm6 535 por %xmm1,%xmm3 536 movdqa %xmm5,16(%ebx) 537 pshufb (%eax),%xmm6 538 movdqa %xmm3,-48(%ebx) 539 paddd %xmm6,%xmm4 540 movdqa 48(%ebx),%xmm5 541 pxor %xmm4,%xmm2 542 movdqa -16(%ebx),%xmm3 543 movdqa %xmm2,%xmm1 544 pslld $12,%xmm2 545 psrld $20,%xmm1 546 por %xmm1,%xmm2 547 movdqa -80(%ebx),%xmm1 548 paddd %xmm2,%xmm0 549 movdqa 112(%ebx),%xmm7 550 pxor %xmm0,%xmm6 551 movdqa %xmm0,-96(%ebx) 552 pshufb 16(%eax),%xmm6 553 paddd %xmm6,%xmm4 554 movdqa %xmm6,96(%ebx) 555 pxor %xmm4,%xmm2 556 paddd %xmm3,%xmm1 557 movdqa %xmm2,%xmm0 558 pslld $7,%xmm2 559 psrld $25,%xmm0 560 pxor %xmm1,%xmm7 561 por %xmm0,%xmm2 562 pshufb (%eax),%xmm7 563 movdqa %xmm2,-32(%ebx) 564 paddd %xmm7,%xmm5 565 pxor %xmm5,%xmm3 566 movdqa -48(%ebx),%xmm2 567 movdqa %xmm3,%xmm0 568 pslld $12,%xmm3 569 psrld $20,%xmm0 570 por %xmm0,%xmm3 571 movdqa -128(%ebx),%xmm0 572 paddd %xmm3,%xmm1 573 pxor %xmm1,%xmm7 574 movdqa %xmm1,-80(%ebx) 575 pshufb 16(%eax),%xmm7 576 paddd %xmm7,%xmm5 577 movdqa %xmm7,%xmm6 578 pxor %xmm5,%xmm3 579 paddd %xmm2,%xmm0 580 movdqa %xmm3,%xmm1 581 pslld $7,%xmm3 582 psrld $25,%xmm1 583 pxor %xmm0,%xmm6 584 por %xmm1,%xmm3 585 pshufb (%eax),%xmm6 586 movdqa %xmm3,-16(%ebx) 587 paddd %xmm6,%xmm4 588 pxor %xmm4,%xmm2 589 movdqa -32(%ebx),%xmm3 590 movdqa %xmm2,%xmm1 591 pslld $12,%xmm2 592 psrld $20,%xmm1 593 por %xmm1,%xmm2 594 movdqa -112(%ebx),%xmm1 595 paddd %xmm2,%xmm0 596 movdqa 64(%ebx),%xmm7 597 pxor %xmm0,%xmm6 598 movdqa %xmm0,-128(%ebx) 599 pshufb 16(%eax),%xmm6 600 paddd %xmm6,%xmm4 601 movdqa %xmm6,112(%ebx) 602 pxor %xmm4,%xmm2 603 paddd %xmm3,%xmm1 604 movdqa %xmm2,%xmm0 605 pslld $7,%xmm2 606 psrld $25,%xmm0 607 pxor %xmm1,%xmm7 608 por %xmm0,%xmm2 609 movdqa %xmm4,32(%ebx) 610 pshufb (%eax),%xmm7 611 movdqa %xmm2,-48(%ebx) 612 paddd %xmm7,%xmm5 613 movdqa (%ebx),%xmm4 614 pxor %xmm5,%xmm3 615 movdqa -16(%ebx),%xmm2 616 movdqa %xmm3,%xmm0 617 pslld $12,%xmm3 618 psrld $20,%xmm0 619 por %xmm0,%xmm3 620 movdqa -96(%ebx),%xmm0 621 paddd %xmm3,%xmm1 622 movdqa 80(%ebx),%xmm6 623 pxor %xmm1,%xmm7 624 movdqa %xmm1,-112(%ebx) 625 pshufb 16(%eax),%xmm7 626 paddd %xmm7,%xmm5 627 movdqa %xmm7,64(%ebx) 628 pxor %xmm5,%xmm3 629 paddd %xmm2,%xmm0 630 movdqa %xmm3,%xmm1 631 pslld $7,%xmm3 632 psrld $25,%xmm1 633 pxor %xmm0,%xmm6 634 por %xmm1,%xmm3 635 movdqa %xmm5,48(%ebx) 636 pshufb (%eax),%xmm6 637 movdqa %xmm3,-32(%ebx) 638 paddd %xmm6,%xmm4 639 movdqa 16(%ebx),%xmm5 640 pxor %xmm4,%xmm2 641 movdqa -64(%ebx),%xmm3 642 movdqa %xmm2,%xmm1 643 pslld $12,%xmm2 644 psrld $20,%xmm1 645 por %xmm1,%xmm2 646 movdqa -80(%ebx),%xmm1 647 paddd %xmm2,%xmm0 648 movdqa 96(%ebx),%xmm7 649 pxor %xmm0,%xmm6 650 movdqa %xmm0,-96(%ebx) 651 pshufb 16(%eax),%xmm6 652 paddd %xmm6,%xmm4 653 movdqa %xmm6,80(%ebx) 654 pxor %xmm4,%xmm2 655 paddd %xmm3,%xmm1 656 movdqa %xmm2,%xmm0 657 pslld $7,%xmm2 658 psrld $25,%xmm0 659 pxor %xmm1,%xmm7 660 por %xmm0,%xmm2 661 pshufb (%eax),%xmm7 662 movdqa %xmm2,-16(%ebx) 663 paddd %xmm7,%xmm5 664 pxor %xmm5,%xmm3 665 movdqa %xmm3,%xmm0 666 pslld $12,%xmm3 667 psrld $20,%xmm0 668 por %xmm0,%xmm3 669 movdqa -128(%ebx),%xmm0 670 paddd %xmm3,%xmm1 671 movdqa 64(%ebx),%xmm6 672 pxor %xmm1,%xmm7 673 movdqa %xmm1,-80(%ebx) 674 pshufb 16(%eax),%xmm7 675 paddd %xmm7,%xmm5 676 movdqa %xmm7,96(%ebx) 677 pxor %xmm5,%xmm3 678 movdqa %xmm3,%xmm1 679 pslld $7,%xmm3 680 psrld $25,%xmm1 681 por %xmm1,%xmm3 682 decl %edx 683 jnz .L010loop 684 movdqa %xmm3,-64(%ebx) 685 movdqa %xmm4,(%ebx) 686 movdqa %xmm5,16(%ebx) 687 movdqa %xmm6,64(%ebx) 688 movdqa %xmm7,96(%ebx) 689 movdqa -112(%ebx),%xmm1 690 movdqa -96(%ebx),%xmm2 691 movdqa -80(%ebx),%xmm3 692 paddd -128(%ebp),%xmm0 693 paddd -112(%ebp),%xmm1 694 paddd -96(%ebp),%xmm2 695 paddd -80(%ebp),%xmm3 696 movdqa %xmm0,%xmm6 697 punpckldq %xmm1,%xmm0 698 movdqa %xmm2,%xmm7 699 punpckldq %xmm3,%xmm2 700 punpckhdq %xmm1,%xmm6 701 punpckhdq %xmm3,%xmm7 702 movdqa %xmm0,%xmm1 703 punpcklqdq %xmm2,%xmm0 704 movdqa %xmm6,%xmm3 705 punpcklqdq %xmm7,%xmm6 706 punpckhqdq %xmm2,%xmm1 707 punpckhqdq %xmm7,%xmm3 708 movdqu -128(%esi),%xmm4 709 movdqu -64(%esi),%xmm5 710 movdqu (%esi),%xmm2 711 movdqu 64(%esi),%xmm7 712 leal 16(%esi),%esi 713 pxor %xmm0,%xmm4 714 movdqa -64(%ebx),%xmm0 715 pxor %xmm1,%xmm5 716 movdqa -48(%ebx),%xmm1 717 pxor %xmm2,%xmm6 718 movdqa -32(%ebx),%xmm2 719 pxor %xmm3,%xmm7 720 movdqa -16(%ebx),%xmm3 721 movdqu %xmm4,-128(%edi) 722 movdqu %xmm5,-64(%edi) 723 movdqu %xmm6,(%edi) 724 movdqu %xmm7,64(%edi) 725 leal 16(%edi),%edi 726 paddd -64(%ebp),%xmm0 727 paddd -48(%ebp),%xmm1 728 paddd -32(%ebp),%xmm2 729 paddd -16(%ebp),%xmm3 730 movdqa %xmm0,%xmm6 731 punpckldq %xmm1,%xmm0 732 movdqa %xmm2,%xmm7 733 punpckldq %xmm3,%xmm2 734 punpckhdq %xmm1,%xmm6 735 punpckhdq %xmm3,%xmm7 736 movdqa %xmm0,%xmm1 737 punpcklqdq %xmm2,%xmm0 738 movdqa %xmm6,%xmm3 739 punpcklqdq %xmm7,%xmm6 740 punpckhqdq %xmm2,%xmm1 741 punpckhqdq %xmm7,%xmm3 742 movdqu -128(%esi),%xmm4 743 movdqu -64(%esi),%xmm5 744 movdqu (%esi),%xmm2 745 movdqu 64(%esi),%xmm7 746 leal 16(%esi),%esi 747 pxor %xmm0,%xmm4 748 movdqa (%ebx),%xmm0 749 pxor %xmm1,%xmm5 750 movdqa 16(%ebx),%xmm1 751 pxor %xmm2,%xmm6 752 movdqa 32(%ebx),%xmm2 753 pxor %xmm3,%xmm7 754 movdqa 48(%ebx),%xmm3 755 movdqu %xmm4,-128(%edi) 756 movdqu %xmm5,-64(%edi) 757 movdqu %xmm6,(%edi) 758 movdqu %xmm7,64(%edi) 759 leal 16(%edi),%edi 760 paddd (%ebp),%xmm0 761 paddd 16(%ebp),%xmm1 762 paddd 32(%ebp),%xmm2 763 paddd 48(%ebp),%xmm3 764 movdqa %xmm0,%xmm6 765 punpckldq %xmm1,%xmm0 766 movdqa %xmm2,%xmm7 767 punpckldq %xmm3,%xmm2 768 punpckhdq %xmm1,%xmm6 769 punpckhdq %xmm3,%xmm7 770 movdqa %xmm0,%xmm1 771 punpcklqdq %xmm2,%xmm0 772 movdqa %xmm6,%xmm3 773 punpcklqdq %xmm7,%xmm6 774 punpckhqdq %xmm2,%xmm1 775 punpckhqdq %xmm7,%xmm3 776 movdqu -128(%esi),%xmm4 777 movdqu -64(%esi),%xmm5 778 movdqu (%esi),%xmm2 779 movdqu 64(%esi),%xmm7 780 leal 16(%esi),%esi 781 pxor %xmm0,%xmm4 782 movdqa 64(%ebx),%xmm0 783 pxor %xmm1,%xmm5 784 movdqa 80(%ebx),%xmm1 785 pxor %xmm2,%xmm6 786 movdqa 96(%ebx),%xmm2 787 pxor %xmm3,%xmm7 788 movdqa 112(%ebx),%xmm3 789 movdqu %xmm4,-128(%edi) 790 movdqu %xmm5,-64(%edi) 791 movdqu %xmm6,(%edi) 792 movdqu %xmm7,64(%edi) 793 leal 16(%edi),%edi 794 paddd 64(%ebp),%xmm0 795 paddd 80(%ebp),%xmm1 796 paddd 96(%ebp),%xmm2 797 paddd 112(%ebp),%xmm3 798 movdqa %xmm0,%xmm6 799 punpckldq %xmm1,%xmm0 800 movdqa %xmm2,%xmm7 801 punpckldq %xmm3,%xmm2 802 punpckhdq %xmm1,%xmm6 803 punpckhdq %xmm3,%xmm7 804 movdqa %xmm0,%xmm1 805 punpcklqdq %xmm2,%xmm0 806 movdqa %xmm6,%xmm3 807 punpcklqdq %xmm7,%xmm6 808 punpckhqdq %xmm2,%xmm1 809 punpckhqdq %xmm7,%xmm3 810 movdqu -128(%esi),%xmm4 811 movdqu -64(%esi),%xmm5 812 movdqu (%esi),%xmm2 813 movdqu 64(%esi),%xmm7 814 leal 208(%esi),%esi 815 pxor %xmm0,%xmm4 816 pxor %xmm1,%xmm5 817 pxor %xmm2,%xmm6 818 pxor %xmm3,%xmm7 819 movdqu %xmm4,-128(%edi) 820 movdqu %xmm5,-64(%edi) 821 movdqu %xmm6,(%edi) 822 movdqu %xmm7,64(%edi) 823 leal 208(%edi),%edi 824 subl $256,%ecx 825 jnc .L009outer_loop 826 addl $256,%ecx 827 jz .L011done 828 movl 520(%esp),%ebx 829 leal -128(%esi),%esi 830 movl 516(%esp),%edx 831 leal -128(%edi),%edi 832 movd 64(%ebp),%xmm2 833 movdqu (%ebx),%xmm3 834 paddd 96(%eax),%xmm2 835 pand 112(%eax),%xmm3 836 por %xmm2,%xmm3 837.L0081x: 838 movdqa 32(%eax),%xmm0 839 movdqu (%edx),%xmm1 840 movdqu 16(%edx),%xmm2 841 movdqa (%eax),%xmm6 842 movdqa 16(%eax),%xmm7 843 movl %ebp,48(%esp) 844 movdqa %xmm0,(%esp) 845 movdqa %xmm1,16(%esp) 846 movdqa %xmm2,32(%esp) 847 movdqa %xmm3,48(%esp) 848 movl $10,%edx 849 jmp .L012loop1x 850.align 16 851.L013outer1x: 852 movdqa 80(%eax),%xmm3 853 movdqa (%esp),%xmm0 854 movdqa 16(%esp),%xmm1 855 movdqa 32(%esp),%xmm2 856 paddd 48(%esp),%xmm3 857 movl $10,%edx 858 movdqa %xmm3,48(%esp) 859 jmp .L012loop1x 860.align 16 861.L012loop1x: 862 paddd %xmm1,%xmm0 863 pxor %xmm0,%xmm3 864.byte 102,15,56,0,222 865 paddd %xmm3,%xmm2 866 pxor %xmm2,%xmm1 867 movdqa %xmm1,%xmm4 868 psrld $20,%xmm1 869 pslld $12,%xmm4 870 por %xmm4,%xmm1 871 paddd %xmm1,%xmm0 872 pxor %xmm0,%xmm3 873.byte 102,15,56,0,223 874 paddd %xmm3,%xmm2 875 pxor %xmm2,%xmm1 876 movdqa %xmm1,%xmm4 877 psrld $25,%xmm1 878 pslld $7,%xmm4 879 por %xmm4,%xmm1 880 pshufd $78,%xmm2,%xmm2 881 pshufd $57,%xmm1,%xmm1 882 pshufd $147,%xmm3,%xmm3 883 nop 884 paddd %xmm1,%xmm0 885 pxor %xmm0,%xmm3 886.byte 102,15,56,0,222 887 paddd %xmm3,%xmm2 888 pxor %xmm2,%xmm1 889 movdqa %xmm1,%xmm4 890 psrld $20,%xmm1 891 pslld $12,%xmm4 892 por %xmm4,%xmm1 893 paddd %xmm1,%xmm0 894 pxor %xmm0,%xmm3 895.byte 102,15,56,0,223 896 paddd %xmm3,%xmm2 897 pxor %xmm2,%xmm1 898 movdqa %xmm1,%xmm4 899 psrld $25,%xmm1 900 pslld $7,%xmm4 901 por %xmm4,%xmm1 902 pshufd $78,%xmm2,%xmm2 903 pshufd $147,%xmm1,%xmm1 904 pshufd $57,%xmm3,%xmm3 905 decl %edx 906 jnz .L012loop1x 907 paddd (%esp),%xmm0 908 paddd 16(%esp),%xmm1 909 paddd 32(%esp),%xmm2 910 paddd 48(%esp),%xmm3 911 cmpl $64,%ecx 912 jb .L014tail 913 movdqu (%esi),%xmm4 914 movdqu 16(%esi),%xmm5 915 pxor %xmm4,%xmm0 916 movdqu 32(%esi),%xmm4 917 pxor %xmm5,%xmm1 918 movdqu 48(%esi),%xmm5 919 pxor %xmm4,%xmm2 920 pxor %xmm5,%xmm3 921 leal 64(%esi),%esi 922 movdqu %xmm0,(%edi) 923 movdqu %xmm1,16(%edi) 924 movdqu %xmm2,32(%edi) 925 movdqu %xmm3,48(%edi) 926 leal 64(%edi),%edi 927 subl $64,%ecx 928 jnz .L013outer1x 929 jmp .L011done 930.L014tail: 931 movdqa %xmm0,(%esp) 932 movdqa %xmm1,16(%esp) 933 movdqa %xmm2,32(%esp) 934 movdqa %xmm3,48(%esp) 935 xorl %eax,%eax 936 xorl %edx,%edx 937 xorl %ebp,%ebp 938.L015tail_loop: 939 movb (%esp,%ebp,1),%al 940 movb (%esi,%ebp,1),%dl 941 leal 1(%ebp),%ebp 942 xorb %dl,%al 943 movb %al,-1(%edi,%ebp,1) 944 decl %ecx 945 jnz .L015tail_loop 946.L011done: 947 movl 512(%esp),%esp 948 popl %edi 949 popl %esi 950 popl %ebx 951 popl %ebp 952 ret 953.size ChaCha20_ssse3,.-.L_ChaCha20_ssse3_begin 954.align 64 955.Lssse3_data: 956.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 957.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 958.long 1634760805,857760878,2036477234,1797285236 959.long 0,1,2,3 960.long 4,4,4,4 961.long 1,0,0,0 962.long 4,0,0,0 963.long 0,-1,-1,-1 964.align 64 965.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 966.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 967.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 968.byte 114,103,62,0 969#endif 970