1#if defined(__i386__) 2.file "chacha-x86.S" 3.text 4.globl _ChaCha20_ctr32 5.private_extern _ChaCha20_ctr32 6.align 4 7_ChaCha20_ctr32: 8L_ChaCha20_ctr32_begin: 9 pushl %ebp 10 pushl %ebx 11 pushl %esi 12 pushl %edi 13 xorl %eax,%eax 14 cmpl 28(%esp),%eax 15 je L000no_data 16 call Lpic_point 17Lpic_point: 18 popl %eax 19 movl L_OPENSSL_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp 20 testl $16777216,(%ebp) 21 jz L001x86 22 testl $512,4(%ebp) 23 jz L001x86 24 jmp Lssse3_shortcut 25L001x86: 26 movl 32(%esp),%esi 27 movl 36(%esp),%edi 28 subl $132,%esp 29 movl (%esi),%eax 30 movl 4(%esi),%ebx 31 movl 8(%esi),%ecx 32 movl 12(%esi),%edx 33 movl %eax,80(%esp) 34 movl %ebx,84(%esp) 35 movl %ecx,88(%esp) 36 movl %edx,92(%esp) 37 movl 16(%esi),%eax 38 movl 20(%esi),%ebx 39 movl 24(%esi),%ecx 40 movl 28(%esi),%edx 41 movl %eax,96(%esp) 42 movl %ebx,100(%esp) 43 movl %ecx,104(%esp) 44 movl %edx,108(%esp) 45 movl (%edi),%eax 46 movl 4(%edi),%ebx 47 movl 8(%edi),%ecx 48 movl 12(%edi),%edx 49 subl $1,%eax 50 movl %eax,112(%esp) 51 movl %ebx,116(%esp) 52 movl %ecx,120(%esp) 53 movl %edx,124(%esp) 54 jmp L002entry 55.align 4,0x90 56L003outer_loop: 57 movl %ebx,156(%esp) 58 movl %eax,152(%esp) 59 movl %ecx,160(%esp) 60L002entry: 61 movl $1634760805,%eax 62 movl $857760878,4(%esp) 63 movl $2036477234,8(%esp) 64 movl $1797285236,12(%esp) 65 movl 84(%esp),%ebx 66 movl 88(%esp),%ebp 67 movl 104(%esp),%ecx 68 movl 108(%esp),%esi 69 movl 116(%esp),%edx 70 movl 120(%esp),%edi 71 movl %ebx,20(%esp) 72 movl %ebp,24(%esp) 73 movl %ecx,40(%esp) 74 movl %esi,44(%esp) 75 movl %edx,52(%esp) 76 movl %edi,56(%esp) 77 movl 92(%esp),%ebx 78 movl 124(%esp),%edi 79 movl 112(%esp),%edx 80 movl 80(%esp),%ebp 81 movl 96(%esp),%ecx 82 movl 100(%esp),%esi 83 addl $1,%edx 84 movl %ebx,28(%esp) 85 movl %edi,60(%esp) 86 movl %edx,112(%esp) 87 movl $10,%ebx 88 jmp L004loop 89.align 4,0x90 90L004loop: 91 addl %ebp,%eax 92 movl %ebx,128(%esp) 93 movl %ebp,%ebx 94 xorl %eax,%edx 95 roll $16,%edx 96 addl %edx,%ecx 97 xorl %ecx,%ebx 98 movl 52(%esp),%edi 99 roll $12,%ebx 100 movl 20(%esp),%ebp 101 addl %ebx,%eax 102 xorl %eax,%edx 103 movl %eax,(%esp) 104 roll $8,%edx 105 movl 4(%esp),%eax 106 addl %edx,%ecx 107 movl %edx,48(%esp) 108 xorl %ecx,%ebx 109 addl %ebp,%eax 110 roll $7,%ebx 111 xorl %eax,%edi 112 movl %ecx,32(%esp) 113 roll $16,%edi 114 movl %ebx,16(%esp) 115 addl %edi,%esi 116 movl 40(%esp),%ecx 117 xorl %esi,%ebp 118 movl 56(%esp),%edx 119 roll $12,%ebp 120 movl 24(%esp),%ebx 121 addl %ebp,%eax 122 xorl %eax,%edi 123 movl %eax,4(%esp) 124 roll $8,%edi 125 movl 8(%esp),%eax 126 addl %edi,%esi 127 movl %edi,52(%esp) 128 xorl %esi,%ebp 129 addl %ebx,%eax 130 roll $7,%ebp 131 xorl %eax,%edx 132 movl %esi,36(%esp) 133 roll $16,%edx 134 movl %ebp,20(%esp) 135 addl %edx,%ecx 136 movl 44(%esp),%esi 137 xorl %ecx,%ebx 138 movl 60(%esp),%edi 139 roll $12,%ebx 140 movl 28(%esp),%ebp 141 addl %ebx,%eax 142 xorl %eax,%edx 143 movl %eax,8(%esp) 144 roll $8,%edx 145 movl 12(%esp),%eax 146 addl %edx,%ecx 147 movl %edx,56(%esp) 148 xorl %ecx,%ebx 149 addl %ebp,%eax 150 roll $7,%ebx 151 xorl %eax,%edi 152 roll $16,%edi 153 movl %ebx,24(%esp) 154 addl %edi,%esi 155 xorl %esi,%ebp 156 roll $12,%ebp 157 movl 20(%esp),%ebx 158 addl %ebp,%eax 159 xorl %eax,%edi 160 movl %eax,12(%esp) 161 roll $8,%edi 162 movl (%esp),%eax 163 addl %edi,%esi 164 movl %edi,%edx 165 xorl %esi,%ebp 166 addl %ebx,%eax 167 roll $7,%ebp 168 xorl %eax,%edx 169 roll $16,%edx 170 movl %ebp,28(%esp) 171 addl %edx,%ecx 172 xorl %ecx,%ebx 173 movl 48(%esp),%edi 174 roll $12,%ebx 175 movl 24(%esp),%ebp 176 addl %ebx,%eax 177 xorl %eax,%edx 178 movl %eax,(%esp) 179 roll $8,%edx 180 movl 4(%esp),%eax 181 addl %edx,%ecx 182 movl %edx,60(%esp) 183 xorl %ecx,%ebx 184 addl %ebp,%eax 185 roll $7,%ebx 186 xorl %eax,%edi 187 movl %ecx,40(%esp) 188 roll $16,%edi 189 movl %ebx,20(%esp) 190 addl %edi,%esi 191 movl 32(%esp),%ecx 192 xorl %esi,%ebp 193 movl 52(%esp),%edx 194 roll $12,%ebp 195 movl 28(%esp),%ebx 196 addl %ebp,%eax 197 xorl %eax,%edi 198 movl %eax,4(%esp) 199 roll $8,%edi 200 movl 8(%esp),%eax 201 addl %edi,%esi 202 movl %edi,48(%esp) 203 xorl %esi,%ebp 204 addl %ebx,%eax 205 roll $7,%ebp 206 xorl %eax,%edx 207 movl %esi,44(%esp) 208 roll $16,%edx 209 movl %ebp,24(%esp) 210 addl %edx,%ecx 211 movl 36(%esp),%esi 212 xorl %ecx,%ebx 213 movl 56(%esp),%edi 214 roll $12,%ebx 215 movl 16(%esp),%ebp 216 addl %ebx,%eax 217 xorl %eax,%edx 218 movl %eax,8(%esp) 219 roll $8,%edx 220 movl 12(%esp),%eax 221 addl %edx,%ecx 222 movl %edx,52(%esp) 223 xorl %ecx,%ebx 224 addl %ebp,%eax 225 roll $7,%ebx 226 xorl %eax,%edi 227 roll $16,%edi 228 movl %ebx,28(%esp) 229 addl %edi,%esi 230 xorl %esi,%ebp 231 movl 48(%esp),%edx 232 roll $12,%ebp 233 movl 128(%esp),%ebx 234 addl %ebp,%eax 235 xorl %eax,%edi 236 movl %eax,12(%esp) 237 roll $8,%edi 238 movl (%esp),%eax 239 addl %edi,%esi 240 movl %edi,56(%esp) 241 xorl %esi,%ebp 242 roll $7,%ebp 243 decl %ebx 244 jnz L004loop 245 movl 160(%esp),%ebx 246 addl $1634760805,%eax 247 addl 80(%esp),%ebp 248 addl 96(%esp),%ecx 249 addl 100(%esp),%esi 250 cmpl $64,%ebx 251 jb L005tail 252 movl 156(%esp),%ebx 253 addl 112(%esp),%edx 254 addl 120(%esp),%edi 255 xorl (%ebx),%eax 256 xorl 16(%ebx),%ebp 257 movl %eax,(%esp) 258 movl 152(%esp),%eax 259 xorl 32(%ebx),%ecx 260 xorl 36(%ebx),%esi 261 xorl 48(%ebx),%edx 262 xorl 56(%ebx),%edi 263 movl %ebp,16(%eax) 264 movl %ecx,32(%eax) 265 movl %esi,36(%eax) 266 movl %edx,48(%eax) 267 movl %edi,56(%eax) 268 movl 4(%esp),%ebp 269 movl 8(%esp),%ecx 270 movl 12(%esp),%esi 271 movl 20(%esp),%edx 272 movl 24(%esp),%edi 273 addl $857760878,%ebp 274 addl $2036477234,%ecx 275 addl $1797285236,%esi 276 addl 84(%esp),%edx 277 addl 88(%esp),%edi 278 xorl 4(%ebx),%ebp 279 xorl 8(%ebx),%ecx 280 xorl 12(%ebx),%esi 281 xorl 20(%ebx),%edx 282 xorl 24(%ebx),%edi 283 movl %ebp,4(%eax) 284 movl %ecx,8(%eax) 285 movl %esi,12(%eax) 286 movl %edx,20(%eax) 287 movl %edi,24(%eax) 288 movl 28(%esp),%ebp 289 movl 40(%esp),%ecx 290 movl 44(%esp),%esi 291 movl 52(%esp),%edx 292 movl 60(%esp),%edi 293 addl 92(%esp),%ebp 294 addl 104(%esp),%ecx 295 addl 108(%esp),%esi 296 addl 116(%esp),%edx 297 addl 124(%esp),%edi 298 xorl 28(%ebx),%ebp 299 xorl 40(%ebx),%ecx 300 xorl 44(%ebx),%esi 301 xorl 52(%ebx),%edx 302 xorl 60(%ebx),%edi 303 leal 64(%ebx),%ebx 304 movl %ebp,28(%eax) 305 movl (%esp),%ebp 306 movl %ecx,40(%eax) 307 movl 160(%esp),%ecx 308 movl %esi,44(%eax) 309 movl %edx,52(%eax) 310 movl %edi,60(%eax) 311 movl %ebp,(%eax) 312 leal 64(%eax),%eax 313 subl $64,%ecx 314 jnz L003outer_loop 315 jmp L006done 316L005tail: 317 addl 112(%esp),%edx 318 addl 120(%esp),%edi 319 movl %eax,(%esp) 320 movl %ebp,16(%esp) 321 movl %ecx,32(%esp) 322 movl %esi,36(%esp) 323 movl %edx,48(%esp) 324 movl %edi,56(%esp) 325 movl 4(%esp),%ebp 326 movl 8(%esp),%ecx 327 movl 12(%esp),%esi 328 movl 20(%esp),%edx 329 movl 24(%esp),%edi 330 addl $857760878,%ebp 331 addl $2036477234,%ecx 332 addl $1797285236,%esi 333 addl 84(%esp),%edx 334 addl 88(%esp),%edi 335 movl %ebp,4(%esp) 336 movl %ecx,8(%esp) 337 movl %esi,12(%esp) 338 movl %edx,20(%esp) 339 movl %edi,24(%esp) 340 movl 28(%esp),%ebp 341 movl 40(%esp),%ecx 342 movl 44(%esp),%esi 343 movl 52(%esp),%edx 344 movl 60(%esp),%edi 345 addl 92(%esp),%ebp 346 addl 104(%esp),%ecx 347 addl 108(%esp),%esi 348 addl 116(%esp),%edx 349 addl 124(%esp),%edi 350 movl %ebp,28(%esp) 351 movl 156(%esp),%ebp 352 movl %ecx,40(%esp) 353 movl 152(%esp),%ecx 354 movl %esi,44(%esp) 355 xorl %esi,%esi 356 movl %edx,52(%esp) 357 movl %edi,60(%esp) 358 xorl %eax,%eax 359 xorl %edx,%edx 360L007tail_loop: 361 movb (%esi,%ebp,1),%al 362 movb (%esp,%esi,1),%dl 363 leal 1(%esi),%esi 364 xorb %dl,%al 365 movb %al,-1(%ecx,%esi,1) 366 decl %ebx 367 jnz L007tail_loop 368L006done: 369 addl $132,%esp 370L000no_data: 371 popl %edi 372 popl %esi 373 popl %ebx 374 popl %ebp 375 ret 376.globl _ChaCha20_ssse3 377.private_extern _ChaCha20_ssse3 378.align 4 379_ChaCha20_ssse3: 380L_ChaCha20_ssse3_begin: 381 pushl %ebp 382 pushl %ebx 383 pushl %esi 384 pushl %edi 385Lssse3_shortcut: 386 movl 20(%esp),%edi 387 movl 24(%esp),%esi 388 movl 28(%esp),%ecx 389 movl 32(%esp),%edx 390 movl 36(%esp),%ebx 391 movl %esp,%ebp 392 subl $524,%esp 393 andl $-64,%esp 394 movl %ebp,512(%esp) 395 leal Lssse3_data-Lpic_point(%eax),%eax 396 movdqu (%ebx),%xmm3 397 cmpl $256,%ecx 398 jb L0081x 399 movl %edx,516(%esp) 400 movl %ebx,520(%esp) 401 subl $256,%ecx 402 leal 384(%esp),%ebp 403 movdqu (%edx),%xmm7 404 pshufd $0,%xmm3,%xmm0 405 pshufd $85,%xmm3,%xmm1 406 pshufd $170,%xmm3,%xmm2 407 pshufd $255,%xmm3,%xmm3 408 paddd 48(%eax),%xmm0 409 pshufd $0,%xmm7,%xmm4 410 pshufd $85,%xmm7,%xmm5 411 psubd 64(%eax),%xmm0 412 pshufd $170,%xmm7,%xmm6 413 pshufd $255,%xmm7,%xmm7 414 movdqa %xmm0,64(%ebp) 415 movdqa %xmm1,80(%ebp) 416 movdqa %xmm2,96(%ebp) 417 movdqa %xmm3,112(%ebp) 418 movdqu 16(%edx),%xmm3 419 movdqa %xmm4,-64(%ebp) 420 movdqa %xmm5,-48(%ebp) 421 movdqa %xmm6,-32(%ebp) 422 movdqa %xmm7,-16(%ebp) 423 movdqa 32(%eax),%xmm7 424 leal 128(%esp),%ebx 425 pshufd $0,%xmm3,%xmm0 426 pshufd $85,%xmm3,%xmm1 427 pshufd $170,%xmm3,%xmm2 428 pshufd $255,%xmm3,%xmm3 429 pshufd $0,%xmm7,%xmm4 430 pshufd $85,%xmm7,%xmm5 431 pshufd $170,%xmm7,%xmm6 432 pshufd $255,%xmm7,%xmm7 433 movdqa %xmm0,(%ebp) 434 movdqa %xmm1,16(%ebp) 435 movdqa %xmm2,32(%ebp) 436 movdqa %xmm3,48(%ebp) 437 movdqa %xmm4,-128(%ebp) 438 movdqa %xmm5,-112(%ebp) 439 movdqa %xmm6,-96(%ebp) 440 movdqa %xmm7,-80(%ebp) 441 leal 128(%esi),%esi 442 leal 128(%edi),%edi 443 jmp L009outer_loop 444.align 4,0x90 445L009outer_loop: 446 movdqa -112(%ebp),%xmm1 447 movdqa -96(%ebp),%xmm2 448 movdqa -80(%ebp),%xmm3 449 movdqa -48(%ebp),%xmm5 450 movdqa -32(%ebp),%xmm6 451 movdqa -16(%ebp),%xmm7 452 movdqa %xmm1,-112(%ebx) 453 movdqa %xmm2,-96(%ebx) 454 movdqa %xmm3,-80(%ebx) 455 movdqa %xmm5,-48(%ebx) 456 movdqa %xmm6,-32(%ebx) 457 movdqa %xmm7,-16(%ebx) 458 movdqa 32(%ebp),%xmm2 459 movdqa 48(%ebp),%xmm3 460 movdqa 64(%ebp),%xmm4 461 movdqa 80(%ebp),%xmm5 462 movdqa 96(%ebp),%xmm6 463 movdqa 112(%ebp),%xmm7 464 paddd 64(%eax),%xmm4 465 movdqa %xmm2,32(%ebx) 466 movdqa %xmm3,48(%ebx) 467 movdqa %xmm4,64(%ebx) 468 movdqa %xmm5,80(%ebx) 469 movdqa %xmm6,96(%ebx) 470 movdqa %xmm7,112(%ebx) 471 movdqa %xmm4,64(%ebp) 472 movdqa -128(%ebp),%xmm0 473 movdqa %xmm4,%xmm6 474 movdqa -64(%ebp),%xmm3 475 movdqa (%ebp),%xmm4 476 movdqa 16(%ebp),%xmm5 477 movl $10,%edx 478 nop 479.align 4,0x90 480L010loop: 481 paddd %xmm3,%xmm0 482 movdqa %xmm3,%xmm2 483 pxor %xmm0,%xmm6 484 pshufb (%eax),%xmm6 485 paddd %xmm6,%xmm4 486 pxor %xmm4,%xmm2 487 movdqa -48(%ebx),%xmm3 488 movdqa %xmm2,%xmm1 489 pslld $12,%xmm2 490 psrld $20,%xmm1 491 por %xmm1,%xmm2 492 movdqa -112(%ebx),%xmm1 493 paddd %xmm2,%xmm0 494 movdqa 80(%ebx),%xmm7 495 pxor %xmm0,%xmm6 496 movdqa %xmm0,-128(%ebx) 497 pshufb 16(%eax),%xmm6 498 paddd %xmm6,%xmm4 499 movdqa %xmm6,64(%ebx) 500 pxor %xmm4,%xmm2 501 paddd %xmm3,%xmm1 502 movdqa %xmm2,%xmm0 503 pslld $7,%xmm2 504 psrld $25,%xmm0 505 pxor %xmm1,%xmm7 506 por %xmm0,%xmm2 507 movdqa %xmm4,(%ebx) 508 pshufb (%eax),%xmm7 509 movdqa %xmm2,-64(%ebx) 510 paddd %xmm7,%xmm5 511 movdqa 32(%ebx),%xmm4 512 pxor %xmm5,%xmm3 513 movdqa -32(%ebx),%xmm2 514 movdqa %xmm3,%xmm0 515 pslld $12,%xmm3 516 psrld $20,%xmm0 517 por %xmm0,%xmm3 518 movdqa -96(%ebx),%xmm0 519 paddd %xmm3,%xmm1 520 movdqa 96(%ebx),%xmm6 521 pxor %xmm1,%xmm7 522 movdqa %xmm1,-112(%ebx) 523 pshufb 16(%eax),%xmm7 524 paddd %xmm7,%xmm5 525 movdqa %xmm7,80(%ebx) 526 pxor %xmm5,%xmm3 527 paddd %xmm2,%xmm0 528 movdqa %xmm3,%xmm1 529 pslld $7,%xmm3 530 psrld $25,%xmm1 531 pxor %xmm0,%xmm6 532 por %xmm1,%xmm3 533 movdqa %xmm5,16(%ebx) 534 pshufb (%eax),%xmm6 535 movdqa %xmm3,-48(%ebx) 536 paddd %xmm6,%xmm4 537 movdqa 48(%ebx),%xmm5 538 pxor %xmm4,%xmm2 539 movdqa -16(%ebx),%xmm3 540 movdqa %xmm2,%xmm1 541 pslld $12,%xmm2 542 psrld $20,%xmm1 543 por %xmm1,%xmm2 544 movdqa -80(%ebx),%xmm1 545 paddd %xmm2,%xmm0 546 movdqa 112(%ebx),%xmm7 547 pxor %xmm0,%xmm6 548 movdqa %xmm0,-96(%ebx) 549 pshufb 16(%eax),%xmm6 550 paddd %xmm6,%xmm4 551 movdqa %xmm6,96(%ebx) 552 pxor %xmm4,%xmm2 553 paddd %xmm3,%xmm1 554 movdqa %xmm2,%xmm0 555 pslld $7,%xmm2 556 psrld $25,%xmm0 557 pxor %xmm1,%xmm7 558 por %xmm0,%xmm2 559 pshufb (%eax),%xmm7 560 movdqa %xmm2,-32(%ebx) 561 paddd %xmm7,%xmm5 562 pxor %xmm5,%xmm3 563 movdqa -48(%ebx),%xmm2 564 movdqa %xmm3,%xmm0 565 pslld $12,%xmm3 566 psrld $20,%xmm0 567 por %xmm0,%xmm3 568 movdqa -128(%ebx),%xmm0 569 paddd %xmm3,%xmm1 570 pxor %xmm1,%xmm7 571 movdqa %xmm1,-80(%ebx) 572 pshufb 16(%eax),%xmm7 573 paddd %xmm7,%xmm5 574 movdqa %xmm7,%xmm6 575 pxor %xmm5,%xmm3 576 paddd %xmm2,%xmm0 577 movdqa %xmm3,%xmm1 578 pslld $7,%xmm3 579 psrld $25,%xmm1 580 pxor %xmm0,%xmm6 581 por %xmm1,%xmm3 582 pshufb (%eax),%xmm6 583 movdqa %xmm3,-16(%ebx) 584 paddd %xmm6,%xmm4 585 pxor %xmm4,%xmm2 586 movdqa -32(%ebx),%xmm3 587 movdqa %xmm2,%xmm1 588 pslld $12,%xmm2 589 psrld $20,%xmm1 590 por %xmm1,%xmm2 591 movdqa -112(%ebx),%xmm1 592 paddd %xmm2,%xmm0 593 movdqa 64(%ebx),%xmm7 594 pxor %xmm0,%xmm6 595 movdqa %xmm0,-128(%ebx) 596 pshufb 16(%eax),%xmm6 597 paddd %xmm6,%xmm4 598 movdqa %xmm6,112(%ebx) 599 pxor %xmm4,%xmm2 600 paddd %xmm3,%xmm1 601 movdqa %xmm2,%xmm0 602 pslld $7,%xmm2 603 psrld $25,%xmm0 604 pxor %xmm1,%xmm7 605 por %xmm0,%xmm2 606 movdqa %xmm4,32(%ebx) 607 pshufb (%eax),%xmm7 608 movdqa %xmm2,-48(%ebx) 609 paddd %xmm7,%xmm5 610 movdqa (%ebx),%xmm4 611 pxor %xmm5,%xmm3 612 movdqa -16(%ebx),%xmm2 613 movdqa %xmm3,%xmm0 614 pslld $12,%xmm3 615 psrld $20,%xmm0 616 por %xmm0,%xmm3 617 movdqa -96(%ebx),%xmm0 618 paddd %xmm3,%xmm1 619 movdqa 80(%ebx),%xmm6 620 pxor %xmm1,%xmm7 621 movdqa %xmm1,-112(%ebx) 622 pshufb 16(%eax),%xmm7 623 paddd %xmm7,%xmm5 624 movdqa %xmm7,64(%ebx) 625 pxor %xmm5,%xmm3 626 paddd %xmm2,%xmm0 627 movdqa %xmm3,%xmm1 628 pslld $7,%xmm3 629 psrld $25,%xmm1 630 pxor %xmm0,%xmm6 631 por %xmm1,%xmm3 632 movdqa %xmm5,48(%ebx) 633 pshufb (%eax),%xmm6 634 movdqa %xmm3,-32(%ebx) 635 paddd %xmm6,%xmm4 636 movdqa 16(%ebx),%xmm5 637 pxor %xmm4,%xmm2 638 movdqa -64(%ebx),%xmm3 639 movdqa %xmm2,%xmm1 640 pslld $12,%xmm2 641 psrld $20,%xmm1 642 por %xmm1,%xmm2 643 movdqa -80(%ebx),%xmm1 644 paddd %xmm2,%xmm0 645 movdqa 96(%ebx),%xmm7 646 pxor %xmm0,%xmm6 647 movdqa %xmm0,-96(%ebx) 648 pshufb 16(%eax),%xmm6 649 paddd %xmm6,%xmm4 650 movdqa %xmm6,80(%ebx) 651 pxor %xmm4,%xmm2 652 paddd %xmm3,%xmm1 653 movdqa %xmm2,%xmm0 654 pslld $7,%xmm2 655 psrld $25,%xmm0 656 pxor %xmm1,%xmm7 657 por %xmm0,%xmm2 658 pshufb (%eax),%xmm7 659 movdqa %xmm2,-16(%ebx) 660 paddd %xmm7,%xmm5 661 pxor %xmm5,%xmm3 662 movdqa %xmm3,%xmm0 663 pslld $12,%xmm3 664 psrld $20,%xmm0 665 por %xmm0,%xmm3 666 movdqa -128(%ebx),%xmm0 667 paddd %xmm3,%xmm1 668 movdqa 64(%ebx),%xmm6 669 pxor %xmm1,%xmm7 670 movdqa %xmm1,-80(%ebx) 671 pshufb 16(%eax),%xmm7 672 paddd %xmm7,%xmm5 673 movdqa %xmm7,96(%ebx) 674 pxor %xmm5,%xmm3 675 movdqa %xmm3,%xmm1 676 pslld $7,%xmm3 677 psrld $25,%xmm1 678 por %xmm1,%xmm3 679 decl %edx 680 jnz L010loop 681 movdqa %xmm3,-64(%ebx) 682 movdqa %xmm4,(%ebx) 683 movdqa %xmm5,16(%ebx) 684 movdqa %xmm6,64(%ebx) 685 movdqa %xmm7,96(%ebx) 686 movdqa -112(%ebx),%xmm1 687 movdqa -96(%ebx),%xmm2 688 movdqa -80(%ebx),%xmm3 689 paddd -128(%ebp),%xmm0 690 paddd -112(%ebp),%xmm1 691 paddd -96(%ebp),%xmm2 692 paddd -80(%ebp),%xmm3 693 movdqa %xmm0,%xmm6 694 punpckldq %xmm1,%xmm0 695 movdqa %xmm2,%xmm7 696 punpckldq %xmm3,%xmm2 697 punpckhdq %xmm1,%xmm6 698 punpckhdq %xmm3,%xmm7 699 movdqa %xmm0,%xmm1 700 punpcklqdq %xmm2,%xmm0 701 movdqa %xmm6,%xmm3 702 punpcklqdq %xmm7,%xmm6 703 punpckhqdq %xmm2,%xmm1 704 punpckhqdq %xmm7,%xmm3 705 movdqu -128(%esi),%xmm4 706 movdqu -64(%esi),%xmm5 707 movdqu (%esi),%xmm2 708 movdqu 64(%esi),%xmm7 709 leal 16(%esi),%esi 710 pxor %xmm0,%xmm4 711 movdqa -64(%ebx),%xmm0 712 pxor %xmm1,%xmm5 713 movdqa -48(%ebx),%xmm1 714 pxor %xmm2,%xmm6 715 movdqa -32(%ebx),%xmm2 716 pxor %xmm3,%xmm7 717 movdqa -16(%ebx),%xmm3 718 movdqu %xmm4,-128(%edi) 719 movdqu %xmm5,-64(%edi) 720 movdqu %xmm6,(%edi) 721 movdqu %xmm7,64(%edi) 722 leal 16(%edi),%edi 723 paddd -64(%ebp),%xmm0 724 paddd -48(%ebp),%xmm1 725 paddd -32(%ebp),%xmm2 726 paddd -16(%ebp),%xmm3 727 movdqa %xmm0,%xmm6 728 punpckldq %xmm1,%xmm0 729 movdqa %xmm2,%xmm7 730 punpckldq %xmm3,%xmm2 731 punpckhdq %xmm1,%xmm6 732 punpckhdq %xmm3,%xmm7 733 movdqa %xmm0,%xmm1 734 punpcklqdq %xmm2,%xmm0 735 movdqa %xmm6,%xmm3 736 punpcklqdq %xmm7,%xmm6 737 punpckhqdq %xmm2,%xmm1 738 punpckhqdq %xmm7,%xmm3 739 movdqu -128(%esi),%xmm4 740 movdqu -64(%esi),%xmm5 741 movdqu (%esi),%xmm2 742 movdqu 64(%esi),%xmm7 743 leal 16(%esi),%esi 744 pxor %xmm0,%xmm4 745 movdqa (%ebx),%xmm0 746 pxor %xmm1,%xmm5 747 movdqa 16(%ebx),%xmm1 748 pxor %xmm2,%xmm6 749 movdqa 32(%ebx),%xmm2 750 pxor %xmm3,%xmm7 751 movdqa 48(%ebx),%xmm3 752 movdqu %xmm4,-128(%edi) 753 movdqu %xmm5,-64(%edi) 754 movdqu %xmm6,(%edi) 755 movdqu %xmm7,64(%edi) 756 leal 16(%edi),%edi 757 paddd (%ebp),%xmm0 758 paddd 16(%ebp),%xmm1 759 paddd 32(%ebp),%xmm2 760 paddd 48(%ebp),%xmm3 761 movdqa %xmm0,%xmm6 762 punpckldq %xmm1,%xmm0 763 movdqa %xmm2,%xmm7 764 punpckldq %xmm3,%xmm2 765 punpckhdq %xmm1,%xmm6 766 punpckhdq %xmm3,%xmm7 767 movdqa %xmm0,%xmm1 768 punpcklqdq %xmm2,%xmm0 769 movdqa %xmm6,%xmm3 770 punpcklqdq %xmm7,%xmm6 771 punpckhqdq %xmm2,%xmm1 772 punpckhqdq %xmm7,%xmm3 773 movdqu -128(%esi),%xmm4 774 movdqu -64(%esi),%xmm5 775 movdqu (%esi),%xmm2 776 movdqu 64(%esi),%xmm7 777 leal 16(%esi),%esi 778 pxor %xmm0,%xmm4 779 movdqa 64(%ebx),%xmm0 780 pxor %xmm1,%xmm5 781 movdqa 80(%ebx),%xmm1 782 pxor %xmm2,%xmm6 783 movdqa 96(%ebx),%xmm2 784 pxor %xmm3,%xmm7 785 movdqa 112(%ebx),%xmm3 786 movdqu %xmm4,-128(%edi) 787 movdqu %xmm5,-64(%edi) 788 movdqu %xmm6,(%edi) 789 movdqu %xmm7,64(%edi) 790 leal 16(%edi),%edi 791 paddd 64(%ebp),%xmm0 792 paddd 80(%ebp),%xmm1 793 paddd 96(%ebp),%xmm2 794 paddd 112(%ebp),%xmm3 795 movdqa %xmm0,%xmm6 796 punpckldq %xmm1,%xmm0 797 movdqa %xmm2,%xmm7 798 punpckldq %xmm3,%xmm2 799 punpckhdq %xmm1,%xmm6 800 punpckhdq %xmm3,%xmm7 801 movdqa %xmm0,%xmm1 802 punpcklqdq %xmm2,%xmm0 803 movdqa %xmm6,%xmm3 804 punpcklqdq %xmm7,%xmm6 805 punpckhqdq %xmm2,%xmm1 806 punpckhqdq %xmm7,%xmm3 807 movdqu -128(%esi),%xmm4 808 movdqu -64(%esi),%xmm5 809 movdqu (%esi),%xmm2 810 movdqu 64(%esi),%xmm7 811 leal 208(%esi),%esi 812 pxor %xmm0,%xmm4 813 pxor %xmm1,%xmm5 814 pxor %xmm2,%xmm6 815 pxor %xmm3,%xmm7 816 movdqu %xmm4,-128(%edi) 817 movdqu %xmm5,-64(%edi) 818 movdqu %xmm6,(%edi) 819 movdqu %xmm7,64(%edi) 820 leal 208(%edi),%edi 821 subl $256,%ecx 822 jnc L009outer_loop 823 addl $256,%ecx 824 jz L011done 825 movl 520(%esp),%ebx 826 leal -128(%esi),%esi 827 movl 516(%esp),%edx 828 leal -128(%edi),%edi 829 movd 64(%ebp),%xmm2 830 movdqu (%ebx),%xmm3 831 paddd 96(%eax),%xmm2 832 pand 112(%eax),%xmm3 833 por %xmm2,%xmm3 834L0081x: 835 movdqa 32(%eax),%xmm0 836 movdqu (%edx),%xmm1 837 movdqu 16(%edx),%xmm2 838 movdqa (%eax),%xmm6 839 movdqa 16(%eax),%xmm7 840 movl %ebp,48(%esp) 841 movdqa %xmm0,(%esp) 842 movdqa %xmm1,16(%esp) 843 movdqa %xmm2,32(%esp) 844 movdqa %xmm3,48(%esp) 845 movl $10,%edx 846 jmp L012loop1x 847.align 4,0x90 848L013outer1x: 849 movdqa 80(%eax),%xmm3 850 movdqa (%esp),%xmm0 851 movdqa 16(%esp),%xmm1 852 movdqa 32(%esp),%xmm2 853 paddd 48(%esp),%xmm3 854 movl $10,%edx 855 movdqa %xmm3,48(%esp) 856 jmp L012loop1x 857.align 4,0x90 858L012loop1x: 859 paddd %xmm1,%xmm0 860 pxor %xmm0,%xmm3 861.byte 102,15,56,0,222 862 paddd %xmm3,%xmm2 863 pxor %xmm2,%xmm1 864 movdqa %xmm1,%xmm4 865 psrld $20,%xmm1 866 pslld $12,%xmm4 867 por %xmm4,%xmm1 868 paddd %xmm1,%xmm0 869 pxor %xmm0,%xmm3 870.byte 102,15,56,0,223 871 paddd %xmm3,%xmm2 872 pxor %xmm2,%xmm1 873 movdqa %xmm1,%xmm4 874 psrld $25,%xmm1 875 pslld $7,%xmm4 876 por %xmm4,%xmm1 877 pshufd $78,%xmm2,%xmm2 878 pshufd $57,%xmm1,%xmm1 879 pshufd $147,%xmm3,%xmm3 880 nop 881 paddd %xmm1,%xmm0 882 pxor %xmm0,%xmm3 883.byte 102,15,56,0,222 884 paddd %xmm3,%xmm2 885 pxor %xmm2,%xmm1 886 movdqa %xmm1,%xmm4 887 psrld $20,%xmm1 888 pslld $12,%xmm4 889 por %xmm4,%xmm1 890 paddd %xmm1,%xmm0 891 pxor %xmm0,%xmm3 892.byte 102,15,56,0,223 893 paddd %xmm3,%xmm2 894 pxor %xmm2,%xmm1 895 movdqa %xmm1,%xmm4 896 psrld $25,%xmm1 897 pslld $7,%xmm4 898 por %xmm4,%xmm1 899 pshufd $78,%xmm2,%xmm2 900 pshufd $147,%xmm1,%xmm1 901 pshufd $57,%xmm3,%xmm3 902 decl %edx 903 jnz L012loop1x 904 paddd (%esp),%xmm0 905 paddd 16(%esp),%xmm1 906 paddd 32(%esp),%xmm2 907 paddd 48(%esp),%xmm3 908 cmpl $64,%ecx 909 jb L014tail 910 movdqu (%esi),%xmm4 911 movdqu 16(%esi),%xmm5 912 pxor %xmm4,%xmm0 913 movdqu 32(%esi),%xmm4 914 pxor %xmm5,%xmm1 915 movdqu 48(%esi),%xmm5 916 pxor %xmm4,%xmm2 917 pxor %xmm5,%xmm3 918 leal 64(%esi),%esi 919 movdqu %xmm0,(%edi) 920 movdqu %xmm1,16(%edi) 921 movdqu %xmm2,32(%edi) 922 movdqu %xmm3,48(%edi) 923 leal 64(%edi),%edi 924 subl $64,%ecx 925 jnz L013outer1x 926 jmp L011done 927L014tail: 928 movdqa %xmm0,(%esp) 929 movdqa %xmm1,16(%esp) 930 movdqa %xmm2,32(%esp) 931 movdqa %xmm3,48(%esp) 932 xorl %eax,%eax 933 xorl %edx,%edx 934 xorl %ebp,%ebp 935L015tail_loop: 936 movb (%esp,%ebp,1),%al 937 movb (%esi,%ebp,1),%dl 938 leal 1(%ebp),%ebp 939 xorb %dl,%al 940 movb %al,-1(%edi,%ebp,1) 941 decl %ecx 942 jnz L015tail_loop 943L011done: 944 movl 512(%esp),%esp 945 popl %edi 946 popl %esi 947 popl %ebx 948 popl %ebp 949 ret 950.align 6,0x90 951Lssse3_data: 952.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 953.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 954.long 1634760805,857760878,2036477234,1797285236 955.long 0,1,2,3 956.long 4,4,4,4 957.long 1,0,0,0 958.long 4,0,0,0 959.long 0,-1,-1,-1 960.align 6,0x90 961.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 962.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 963.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 964.byte 114,103,62,0 965.section __IMPORT,__pointers,non_lazy_symbol_pointers 966L_OPENSSL_ia32cap_P$non_lazy_ptr: 967.indirect_symbol _OPENSSL_ia32cap_P 968.long 0 969#endif 970