1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__i386__) 5#if defined(BORINGSSL_PREFIX) 6#include <boringssl_prefix_symbols_asm.h> 7#endif 8.text 9.globl _GFp_ChaCha20_ctr32 10.private_extern _GFp_ChaCha20_ctr32 11.align 4 12_GFp_ChaCha20_ctr32: 13L_GFp_ChaCha20_ctr32_begin: 14 pushl %ebp 15 pushl %ebx 16 pushl %esi 17 pushl %edi 18 xorl %eax,%eax 19 cmpl 28(%esp),%eax 20 je L000no_data 21 call Lpic_point 22Lpic_point: 23 popl %eax 24 movl L_GFp_ia32cap_P$non_lazy_ptr-Lpic_point(%eax),%ebp 25 testl $16777216,(%ebp) 26 jz L001x86 27 testl $512,4(%ebp) 28 jz L001x86 29 jmp Lssse3_shortcut 30L001x86: 31 movl 32(%esp),%esi 32 movl 36(%esp),%edi 33 subl $132,%esp 34 movl (%esi),%eax 35 movl 4(%esi),%ebx 36 movl 8(%esi),%ecx 37 movl 12(%esi),%edx 38 movl %eax,80(%esp) 39 movl %ebx,84(%esp) 40 movl %ecx,88(%esp) 41 movl %edx,92(%esp) 42 movl 16(%esi),%eax 43 movl 20(%esi),%ebx 44 movl 24(%esi),%ecx 45 movl 28(%esi),%edx 46 movl %eax,96(%esp) 47 movl %ebx,100(%esp) 48 movl %ecx,104(%esp) 49 movl %edx,108(%esp) 50 movl (%edi),%eax 51 movl 4(%edi),%ebx 52 movl 8(%edi),%ecx 53 movl 12(%edi),%edx 54 subl $1,%eax 55 movl %eax,112(%esp) 56 movl %ebx,116(%esp) 57 movl %ecx,120(%esp) 58 movl %edx,124(%esp) 59 jmp L002entry 60.align 4,0x90 61L003outer_loop: 62 movl %ebx,156(%esp) 63 movl %eax,152(%esp) 64 movl %ecx,160(%esp) 65L002entry: 66 movl $1634760805,%eax 67 movl $857760878,4(%esp) 68 movl $2036477234,8(%esp) 69 movl $1797285236,12(%esp) 70 movl 84(%esp),%ebx 71 movl 88(%esp),%ebp 72 movl 104(%esp),%ecx 73 movl 108(%esp),%esi 74 movl 116(%esp),%edx 75 movl 120(%esp),%edi 76 movl %ebx,20(%esp) 77 movl %ebp,24(%esp) 78 movl %ecx,40(%esp) 79 movl %esi,44(%esp) 80 movl %edx,52(%esp) 81 movl %edi,56(%esp) 82 movl 92(%esp),%ebx 83 movl 124(%esp),%edi 84 movl 112(%esp),%edx 85 movl 80(%esp),%ebp 86 movl 96(%esp),%ecx 87 movl 100(%esp),%esi 88 addl $1,%edx 89 movl %ebx,28(%esp) 90 movl %edi,60(%esp) 91 movl %edx,112(%esp) 92 movl $10,%ebx 93 jmp L004loop 94.align 4,0x90 95L004loop: 96 addl %ebp,%eax 97 movl %ebx,128(%esp) 98 movl %ebp,%ebx 99 xorl %eax,%edx 100 roll $16,%edx 101 addl %edx,%ecx 102 xorl %ecx,%ebx 103 movl 52(%esp),%edi 104 roll $12,%ebx 105 movl 20(%esp),%ebp 106 addl %ebx,%eax 107 xorl %eax,%edx 108 movl %eax,(%esp) 109 roll $8,%edx 110 movl 4(%esp),%eax 111 addl %edx,%ecx 112 movl %edx,48(%esp) 113 xorl %ecx,%ebx 114 addl %ebp,%eax 115 roll $7,%ebx 116 xorl %eax,%edi 117 movl %ecx,32(%esp) 118 roll $16,%edi 119 movl %ebx,16(%esp) 120 addl %edi,%esi 121 movl 40(%esp),%ecx 122 xorl %esi,%ebp 123 movl 56(%esp),%edx 124 roll $12,%ebp 125 movl 24(%esp),%ebx 126 addl %ebp,%eax 127 xorl %eax,%edi 128 movl %eax,4(%esp) 129 roll $8,%edi 130 movl 8(%esp),%eax 131 addl %edi,%esi 132 movl %edi,52(%esp) 133 xorl %esi,%ebp 134 addl %ebx,%eax 135 roll $7,%ebp 136 xorl %eax,%edx 137 movl %esi,36(%esp) 138 roll $16,%edx 139 movl %ebp,20(%esp) 140 addl %edx,%ecx 141 movl 44(%esp),%esi 142 xorl %ecx,%ebx 143 movl 60(%esp),%edi 144 roll $12,%ebx 145 movl 28(%esp),%ebp 146 addl %ebx,%eax 147 xorl %eax,%edx 148 movl %eax,8(%esp) 149 roll $8,%edx 150 movl 12(%esp),%eax 151 addl %edx,%ecx 152 movl %edx,56(%esp) 153 xorl %ecx,%ebx 154 addl %ebp,%eax 155 roll $7,%ebx 156 xorl %eax,%edi 157 roll $16,%edi 158 movl %ebx,24(%esp) 159 addl %edi,%esi 160 xorl %esi,%ebp 161 roll $12,%ebp 162 movl 20(%esp),%ebx 163 addl %ebp,%eax 164 xorl %eax,%edi 165 movl %eax,12(%esp) 166 roll $8,%edi 167 movl (%esp),%eax 168 addl %edi,%esi 169 movl %edi,%edx 170 xorl %esi,%ebp 171 addl %ebx,%eax 172 roll $7,%ebp 173 xorl %eax,%edx 174 roll $16,%edx 175 movl %ebp,28(%esp) 176 addl %edx,%ecx 177 xorl %ecx,%ebx 178 movl 48(%esp),%edi 179 roll $12,%ebx 180 movl 24(%esp),%ebp 181 addl %ebx,%eax 182 xorl %eax,%edx 183 movl %eax,(%esp) 184 roll $8,%edx 185 movl 4(%esp),%eax 186 addl %edx,%ecx 187 movl %edx,60(%esp) 188 xorl %ecx,%ebx 189 addl %ebp,%eax 190 roll $7,%ebx 191 xorl %eax,%edi 192 movl %ecx,40(%esp) 193 roll $16,%edi 194 movl %ebx,20(%esp) 195 addl %edi,%esi 196 movl 32(%esp),%ecx 197 xorl %esi,%ebp 198 movl 52(%esp),%edx 199 roll $12,%ebp 200 movl 28(%esp),%ebx 201 addl %ebp,%eax 202 xorl %eax,%edi 203 movl %eax,4(%esp) 204 roll $8,%edi 205 movl 8(%esp),%eax 206 addl %edi,%esi 207 movl %edi,48(%esp) 208 xorl %esi,%ebp 209 addl %ebx,%eax 210 roll $7,%ebp 211 xorl %eax,%edx 212 movl %esi,44(%esp) 213 roll $16,%edx 214 movl %ebp,24(%esp) 215 addl %edx,%ecx 216 movl 36(%esp),%esi 217 xorl %ecx,%ebx 218 movl 56(%esp),%edi 219 roll $12,%ebx 220 movl 16(%esp),%ebp 221 addl %ebx,%eax 222 xorl %eax,%edx 223 movl %eax,8(%esp) 224 roll $8,%edx 225 movl 12(%esp),%eax 226 addl %edx,%ecx 227 movl %edx,52(%esp) 228 xorl %ecx,%ebx 229 addl %ebp,%eax 230 roll $7,%ebx 231 xorl %eax,%edi 232 roll $16,%edi 233 movl %ebx,28(%esp) 234 addl %edi,%esi 235 xorl %esi,%ebp 236 movl 48(%esp),%edx 237 roll $12,%ebp 238 movl 128(%esp),%ebx 239 addl %ebp,%eax 240 xorl %eax,%edi 241 movl %eax,12(%esp) 242 roll $8,%edi 243 movl (%esp),%eax 244 addl %edi,%esi 245 movl %edi,56(%esp) 246 xorl %esi,%ebp 247 roll $7,%ebp 248 decl %ebx 249 jnz L004loop 250 movl 160(%esp),%ebx 251 addl $1634760805,%eax 252 addl 80(%esp),%ebp 253 addl 96(%esp),%ecx 254 addl 100(%esp),%esi 255 cmpl $64,%ebx 256 jb L005tail 257 movl 156(%esp),%ebx 258 addl 112(%esp),%edx 259 addl 120(%esp),%edi 260 xorl (%ebx),%eax 261 xorl 16(%ebx),%ebp 262 movl %eax,(%esp) 263 movl 152(%esp),%eax 264 xorl 32(%ebx),%ecx 265 xorl 36(%ebx),%esi 266 xorl 48(%ebx),%edx 267 xorl 56(%ebx),%edi 268 movl %ebp,16(%eax) 269 movl %ecx,32(%eax) 270 movl %esi,36(%eax) 271 movl %edx,48(%eax) 272 movl %edi,56(%eax) 273 movl 4(%esp),%ebp 274 movl 8(%esp),%ecx 275 movl 12(%esp),%esi 276 movl 20(%esp),%edx 277 movl 24(%esp),%edi 278 addl $857760878,%ebp 279 addl $2036477234,%ecx 280 addl $1797285236,%esi 281 addl 84(%esp),%edx 282 addl 88(%esp),%edi 283 xorl 4(%ebx),%ebp 284 xorl 8(%ebx),%ecx 285 xorl 12(%ebx),%esi 286 xorl 20(%ebx),%edx 287 xorl 24(%ebx),%edi 288 movl %ebp,4(%eax) 289 movl %ecx,8(%eax) 290 movl %esi,12(%eax) 291 movl %edx,20(%eax) 292 movl %edi,24(%eax) 293 movl 28(%esp),%ebp 294 movl 40(%esp),%ecx 295 movl 44(%esp),%esi 296 movl 52(%esp),%edx 297 movl 60(%esp),%edi 298 addl 92(%esp),%ebp 299 addl 104(%esp),%ecx 300 addl 108(%esp),%esi 301 addl 116(%esp),%edx 302 addl 124(%esp),%edi 303 xorl 28(%ebx),%ebp 304 xorl 40(%ebx),%ecx 305 xorl 44(%ebx),%esi 306 xorl 52(%ebx),%edx 307 xorl 60(%ebx),%edi 308 leal 64(%ebx),%ebx 309 movl %ebp,28(%eax) 310 movl (%esp),%ebp 311 movl %ecx,40(%eax) 312 movl 160(%esp),%ecx 313 movl %esi,44(%eax) 314 movl %edx,52(%eax) 315 movl %edi,60(%eax) 316 movl %ebp,(%eax) 317 leal 64(%eax),%eax 318 subl $64,%ecx 319 jnz L003outer_loop 320 jmp L006done 321L005tail: 322 addl 112(%esp),%edx 323 addl 120(%esp),%edi 324 movl %eax,(%esp) 325 movl %ebp,16(%esp) 326 movl %ecx,32(%esp) 327 movl %esi,36(%esp) 328 movl %edx,48(%esp) 329 movl %edi,56(%esp) 330 movl 4(%esp),%ebp 331 movl 8(%esp),%ecx 332 movl 12(%esp),%esi 333 movl 20(%esp),%edx 334 movl 24(%esp),%edi 335 addl $857760878,%ebp 336 addl $2036477234,%ecx 337 addl $1797285236,%esi 338 addl 84(%esp),%edx 339 addl 88(%esp),%edi 340 movl %ebp,4(%esp) 341 movl %ecx,8(%esp) 342 movl %esi,12(%esp) 343 movl %edx,20(%esp) 344 movl %edi,24(%esp) 345 movl 28(%esp),%ebp 346 movl 40(%esp),%ecx 347 movl 44(%esp),%esi 348 movl 52(%esp),%edx 349 movl 60(%esp),%edi 350 addl 92(%esp),%ebp 351 addl 104(%esp),%ecx 352 addl 108(%esp),%esi 353 addl 116(%esp),%edx 354 addl 124(%esp),%edi 355 movl %ebp,28(%esp) 356 movl 156(%esp),%ebp 357 movl %ecx,40(%esp) 358 movl 152(%esp),%ecx 359 movl %esi,44(%esp) 360 xorl %esi,%esi 361 movl %edx,52(%esp) 362 movl %edi,60(%esp) 363 xorl %eax,%eax 364 xorl %edx,%edx 365L007tail_loop: 366 movb (%esi,%ebp,1),%al 367 movb (%esp,%esi,1),%dl 368 leal 1(%esi),%esi 369 xorb %dl,%al 370 movb %al,-1(%ecx,%esi,1) 371 decl %ebx 372 jnz L007tail_loop 373L006done: 374 addl $132,%esp 375L000no_data: 376 popl %edi 377 popl %esi 378 popl %ebx 379 popl %ebp 380 ret 381.private_extern __ChaCha20_ssse3 382.align 4 383__ChaCha20_ssse3: 384 pushl %ebp 385 pushl %ebx 386 pushl %esi 387 pushl %edi 388Lssse3_shortcut: 389 movl 20(%esp),%edi 390 movl 24(%esp),%esi 391 movl 28(%esp),%ecx 392 movl 32(%esp),%edx 393 movl 36(%esp),%ebx 394 movl %esp,%ebp 395 subl $524,%esp 396 andl $-64,%esp 397 movl %ebp,512(%esp) 398 leal Lssse3_data-Lpic_point(%eax),%eax 399 movdqu (%ebx),%xmm3 400 cmpl $256,%ecx 401 jb L0081x 402 movl %edx,516(%esp) 403 movl %ebx,520(%esp) 404 subl $256,%ecx 405 leal 384(%esp),%ebp 406 movdqu (%edx),%xmm7 407 pshufd $0,%xmm3,%xmm0 408 pshufd $85,%xmm3,%xmm1 409 pshufd $170,%xmm3,%xmm2 410 pshufd $255,%xmm3,%xmm3 411 paddd 48(%eax),%xmm0 412 pshufd $0,%xmm7,%xmm4 413 pshufd $85,%xmm7,%xmm5 414 psubd 64(%eax),%xmm0 415 pshufd $170,%xmm7,%xmm6 416 pshufd $255,%xmm7,%xmm7 417 movdqa %xmm0,64(%ebp) 418 movdqa %xmm1,80(%ebp) 419 movdqa %xmm2,96(%ebp) 420 movdqa %xmm3,112(%ebp) 421 movdqu 16(%edx),%xmm3 422 movdqa %xmm4,-64(%ebp) 423 movdqa %xmm5,-48(%ebp) 424 movdqa %xmm6,-32(%ebp) 425 movdqa %xmm7,-16(%ebp) 426 movdqa 32(%eax),%xmm7 427 leal 128(%esp),%ebx 428 pshufd $0,%xmm3,%xmm0 429 pshufd $85,%xmm3,%xmm1 430 pshufd $170,%xmm3,%xmm2 431 pshufd $255,%xmm3,%xmm3 432 pshufd $0,%xmm7,%xmm4 433 pshufd $85,%xmm7,%xmm5 434 pshufd $170,%xmm7,%xmm6 435 pshufd $255,%xmm7,%xmm7 436 movdqa %xmm0,(%ebp) 437 movdqa %xmm1,16(%ebp) 438 movdqa %xmm2,32(%ebp) 439 movdqa %xmm3,48(%ebp) 440 movdqa %xmm4,-128(%ebp) 441 movdqa %xmm5,-112(%ebp) 442 movdqa %xmm6,-96(%ebp) 443 movdqa %xmm7,-80(%ebp) 444 leal 128(%esi),%esi 445 leal 128(%edi),%edi 446 jmp L009outer_loop 447.align 4,0x90 448L009outer_loop: 449 movdqa -112(%ebp),%xmm1 450 movdqa -96(%ebp),%xmm2 451 movdqa -80(%ebp),%xmm3 452 movdqa -48(%ebp),%xmm5 453 movdqa -32(%ebp),%xmm6 454 movdqa -16(%ebp),%xmm7 455 movdqa %xmm1,-112(%ebx) 456 movdqa %xmm2,-96(%ebx) 457 movdqa %xmm3,-80(%ebx) 458 movdqa %xmm5,-48(%ebx) 459 movdqa %xmm6,-32(%ebx) 460 movdqa %xmm7,-16(%ebx) 461 movdqa 32(%ebp),%xmm2 462 movdqa 48(%ebp),%xmm3 463 movdqa 64(%ebp),%xmm4 464 movdqa 80(%ebp),%xmm5 465 movdqa 96(%ebp),%xmm6 466 movdqa 112(%ebp),%xmm7 467 paddd 64(%eax),%xmm4 468 movdqa %xmm2,32(%ebx) 469 movdqa %xmm3,48(%ebx) 470 movdqa %xmm4,64(%ebx) 471 movdqa %xmm5,80(%ebx) 472 movdqa %xmm6,96(%ebx) 473 movdqa %xmm7,112(%ebx) 474 movdqa %xmm4,64(%ebp) 475 movdqa -128(%ebp),%xmm0 476 movdqa %xmm4,%xmm6 477 movdqa -64(%ebp),%xmm3 478 movdqa (%ebp),%xmm4 479 movdqa 16(%ebp),%xmm5 480 movl $10,%edx 481 nop 482.align 4,0x90 483L010loop: 484 paddd %xmm3,%xmm0 485 movdqa %xmm3,%xmm2 486 pxor %xmm0,%xmm6 487 pshufb (%eax),%xmm6 488 paddd %xmm6,%xmm4 489 pxor %xmm4,%xmm2 490 movdqa -48(%ebx),%xmm3 491 movdqa %xmm2,%xmm1 492 pslld $12,%xmm2 493 psrld $20,%xmm1 494 por %xmm1,%xmm2 495 movdqa -112(%ebx),%xmm1 496 paddd %xmm2,%xmm0 497 movdqa 80(%ebx),%xmm7 498 pxor %xmm0,%xmm6 499 movdqa %xmm0,-128(%ebx) 500 pshufb 16(%eax),%xmm6 501 paddd %xmm6,%xmm4 502 movdqa %xmm6,64(%ebx) 503 pxor %xmm4,%xmm2 504 paddd %xmm3,%xmm1 505 movdqa %xmm2,%xmm0 506 pslld $7,%xmm2 507 psrld $25,%xmm0 508 pxor %xmm1,%xmm7 509 por %xmm0,%xmm2 510 movdqa %xmm4,(%ebx) 511 pshufb (%eax),%xmm7 512 movdqa %xmm2,-64(%ebx) 513 paddd %xmm7,%xmm5 514 movdqa 32(%ebx),%xmm4 515 pxor %xmm5,%xmm3 516 movdqa -32(%ebx),%xmm2 517 movdqa %xmm3,%xmm0 518 pslld $12,%xmm3 519 psrld $20,%xmm0 520 por %xmm0,%xmm3 521 movdqa -96(%ebx),%xmm0 522 paddd %xmm3,%xmm1 523 movdqa 96(%ebx),%xmm6 524 pxor %xmm1,%xmm7 525 movdqa %xmm1,-112(%ebx) 526 pshufb 16(%eax),%xmm7 527 paddd %xmm7,%xmm5 528 movdqa %xmm7,80(%ebx) 529 pxor %xmm5,%xmm3 530 paddd %xmm2,%xmm0 531 movdqa %xmm3,%xmm1 532 pslld $7,%xmm3 533 psrld $25,%xmm1 534 pxor %xmm0,%xmm6 535 por %xmm1,%xmm3 536 movdqa %xmm5,16(%ebx) 537 pshufb (%eax),%xmm6 538 movdqa %xmm3,-48(%ebx) 539 paddd %xmm6,%xmm4 540 movdqa 48(%ebx),%xmm5 541 pxor %xmm4,%xmm2 542 movdqa -16(%ebx),%xmm3 543 movdqa %xmm2,%xmm1 544 pslld $12,%xmm2 545 psrld $20,%xmm1 546 por %xmm1,%xmm2 547 movdqa -80(%ebx),%xmm1 548 paddd %xmm2,%xmm0 549 movdqa 112(%ebx),%xmm7 550 pxor %xmm0,%xmm6 551 movdqa %xmm0,-96(%ebx) 552 pshufb 16(%eax),%xmm6 553 paddd %xmm6,%xmm4 554 movdqa %xmm6,96(%ebx) 555 pxor %xmm4,%xmm2 556 paddd %xmm3,%xmm1 557 movdqa %xmm2,%xmm0 558 pslld $7,%xmm2 559 psrld $25,%xmm0 560 pxor %xmm1,%xmm7 561 por %xmm0,%xmm2 562 pshufb (%eax),%xmm7 563 movdqa %xmm2,-32(%ebx) 564 paddd %xmm7,%xmm5 565 pxor %xmm5,%xmm3 566 movdqa -48(%ebx),%xmm2 567 movdqa %xmm3,%xmm0 568 pslld $12,%xmm3 569 psrld $20,%xmm0 570 por %xmm0,%xmm3 571 movdqa -128(%ebx),%xmm0 572 paddd %xmm3,%xmm1 573 pxor %xmm1,%xmm7 574 movdqa %xmm1,-80(%ebx) 575 pshufb 16(%eax),%xmm7 576 paddd %xmm7,%xmm5 577 movdqa %xmm7,%xmm6 578 pxor %xmm5,%xmm3 579 paddd %xmm2,%xmm0 580 movdqa %xmm3,%xmm1 581 pslld $7,%xmm3 582 psrld $25,%xmm1 583 pxor %xmm0,%xmm6 584 por %xmm1,%xmm3 585 pshufb (%eax),%xmm6 586 movdqa %xmm3,-16(%ebx) 587 paddd %xmm6,%xmm4 588 pxor %xmm4,%xmm2 589 movdqa -32(%ebx),%xmm3 590 movdqa %xmm2,%xmm1 591 pslld $12,%xmm2 592 psrld $20,%xmm1 593 por %xmm1,%xmm2 594 movdqa -112(%ebx),%xmm1 595 paddd %xmm2,%xmm0 596 movdqa 64(%ebx),%xmm7 597 pxor %xmm0,%xmm6 598 movdqa %xmm0,-128(%ebx) 599 pshufb 16(%eax),%xmm6 600 paddd %xmm6,%xmm4 601 movdqa %xmm6,112(%ebx) 602 pxor %xmm4,%xmm2 603 paddd %xmm3,%xmm1 604 movdqa %xmm2,%xmm0 605 pslld $7,%xmm2 606 psrld $25,%xmm0 607 pxor %xmm1,%xmm7 608 por %xmm0,%xmm2 609 movdqa %xmm4,32(%ebx) 610 pshufb (%eax),%xmm7 611 movdqa %xmm2,-48(%ebx) 612 paddd %xmm7,%xmm5 613 movdqa (%ebx),%xmm4 614 pxor %xmm5,%xmm3 615 movdqa -16(%ebx),%xmm2 616 movdqa %xmm3,%xmm0 617 pslld $12,%xmm3 618 psrld $20,%xmm0 619 por %xmm0,%xmm3 620 movdqa -96(%ebx),%xmm0 621 paddd %xmm3,%xmm1 622 movdqa 80(%ebx),%xmm6 623 pxor %xmm1,%xmm7 624 movdqa %xmm1,-112(%ebx) 625 pshufb 16(%eax),%xmm7 626 paddd %xmm7,%xmm5 627 movdqa %xmm7,64(%ebx) 628 pxor %xmm5,%xmm3 629 paddd %xmm2,%xmm0 630 movdqa %xmm3,%xmm1 631 pslld $7,%xmm3 632 psrld $25,%xmm1 633 pxor %xmm0,%xmm6 634 por %xmm1,%xmm3 635 movdqa %xmm5,48(%ebx) 636 pshufb (%eax),%xmm6 637 movdqa %xmm3,-32(%ebx) 638 paddd %xmm6,%xmm4 639 movdqa 16(%ebx),%xmm5 640 pxor %xmm4,%xmm2 641 movdqa -64(%ebx),%xmm3 642 movdqa %xmm2,%xmm1 643 pslld $12,%xmm2 644 psrld $20,%xmm1 645 por %xmm1,%xmm2 646 movdqa -80(%ebx),%xmm1 647 paddd %xmm2,%xmm0 648 movdqa 96(%ebx),%xmm7 649 pxor %xmm0,%xmm6 650 movdqa %xmm0,-96(%ebx) 651 pshufb 16(%eax),%xmm6 652 paddd %xmm6,%xmm4 653 movdqa %xmm6,80(%ebx) 654 pxor %xmm4,%xmm2 655 paddd %xmm3,%xmm1 656 movdqa %xmm2,%xmm0 657 pslld $7,%xmm2 658 psrld $25,%xmm0 659 pxor %xmm1,%xmm7 660 por %xmm0,%xmm2 661 pshufb (%eax),%xmm7 662 movdqa %xmm2,-16(%ebx) 663 paddd %xmm7,%xmm5 664 pxor %xmm5,%xmm3 665 movdqa %xmm3,%xmm0 666 pslld $12,%xmm3 667 psrld $20,%xmm0 668 por %xmm0,%xmm3 669 movdqa -128(%ebx),%xmm0 670 paddd %xmm3,%xmm1 671 movdqa 64(%ebx),%xmm6 672 pxor %xmm1,%xmm7 673 movdqa %xmm1,-80(%ebx) 674 pshufb 16(%eax),%xmm7 675 paddd %xmm7,%xmm5 676 movdqa %xmm7,96(%ebx) 677 pxor %xmm5,%xmm3 678 movdqa %xmm3,%xmm1 679 pslld $7,%xmm3 680 psrld $25,%xmm1 681 por %xmm1,%xmm3 682 decl %edx 683 jnz L010loop 684 movdqa %xmm3,-64(%ebx) 685 movdqa %xmm4,(%ebx) 686 movdqa %xmm5,16(%ebx) 687 movdqa %xmm6,64(%ebx) 688 movdqa %xmm7,96(%ebx) 689 movdqa -112(%ebx),%xmm1 690 movdqa -96(%ebx),%xmm2 691 movdqa -80(%ebx),%xmm3 692 paddd -128(%ebp),%xmm0 693 paddd -112(%ebp),%xmm1 694 paddd -96(%ebp),%xmm2 695 paddd -80(%ebp),%xmm3 696 movdqa %xmm0,%xmm6 697 punpckldq %xmm1,%xmm0 698 movdqa %xmm2,%xmm7 699 punpckldq %xmm3,%xmm2 700 punpckhdq %xmm1,%xmm6 701 punpckhdq %xmm3,%xmm7 702 movdqa %xmm0,%xmm1 703 punpcklqdq %xmm2,%xmm0 704 movdqa %xmm6,%xmm3 705 punpcklqdq %xmm7,%xmm6 706 punpckhqdq %xmm2,%xmm1 707 punpckhqdq %xmm7,%xmm3 708 movdqu -128(%esi),%xmm4 709 movdqu -64(%esi),%xmm5 710 movdqu (%esi),%xmm2 711 movdqu 64(%esi),%xmm7 712 leal 16(%esi),%esi 713 pxor %xmm0,%xmm4 714 movdqa -64(%ebx),%xmm0 715 pxor %xmm1,%xmm5 716 movdqa -48(%ebx),%xmm1 717 pxor %xmm2,%xmm6 718 movdqa -32(%ebx),%xmm2 719 pxor %xmm3,%xmm7 720 movdqa -16(%ebx),%xmm3 721 movdqu %xmm4,-128(%edi) 722 movdqu %xmm5,-64(%edi) 723 movdqu %xmm6,(%edi) 724 movdqu %xmm7,64(%edi) 725 leal 16(%edi),%edi 726 paddd -64(%ebp),%xmm0 727 paddd -48(%ebp),%xmm1 728 paddd -32(%ebp),%xmm2 729 paddd -16(%ebp),%xmm3 730 movdqa %xmm0,%xmm6 731 punpckldq %xmm1,%xmm0 732 movdqa %xmm2,%xmm7 733 punpckldq %xmm3,%xmm2 734 punpckhdq %xmm1,%xmm6 735 punpckhdq %xmm3,%xmm7 736 movdqa %xmm0,%xmm1 737 punpcklqdq %xmm2,%xmm0 738 movdqa %xmm6,%xmm3 739 punpcklqdq %xmm7,%xmm6 740 punpckhqdq %xmm2,%xmm1 741 punpckhqdq %xmm7,%xmm3 742 movdqu -128(%esi),%xmm4 743 movdqu -64(%esi),%xmm5 744 movdqu (%esi),%xmm2 745 movdqu 64(%esi),%xmm7 746 leal 16(%esi),%esi 747 pxor %xmm0,%xmm4 748 movdqa (%ebx),%xmm0 749 pxor %xmm1,%xmm5 750 movdqa 16(%ebx),%xmm1 751 pxor %xmm2,%xmm6 752 movdqa 32(%ebx),%xmm2 753 pxor %xmm3,%xmm7 754 movdqa 48(%ebx),%xmm3 755 movdqu %xmm4,-128(%edi) 756 movdqu %xmm5,-64(%edi) 757 movdqu %xmm6,(%edi) 758 movdqu %xmm7,64(%edi) 759 leal 16(%edi),%edi 760 paddd (%ebp),%xmm0 761 paddd 16(%ebp),%xmm1 762 paddd 32(%ebp),%xmm2 763 paddd 48(%ebp),%xmm3 764 movdqa %xmm0,%xmm6 765 punpckldq %xmm1,%xmm0 766 movdqa %xmm2,%xmm7 767 punpckldq %xmm3,%xmm2 768 punpckhdq %xmm1,%xmm6 769 punpckhdq %xmm3,%xmm7 770 movdqa %xmm0,%xmm1 771 punpcklqdq %xmm2,%xmm0 772 movdqa %xmm6,%xmm3 773 punpcklqdq %xmm7,%xmm6 774 punpckhqdq %xmm2,%xmm1 775 punpckhqdq %xmm7,%xmm3 776 movdqu -128(%esi),%xmm4 777 movdqu -64(%esi),%xmm5 778 movdqu (%esi),%xmm2 779 movdqu 64(%esi),%xmm7 780 leal 16(%esi),%esi 781 pxor %xmm0,%xmm4 782 movdqa 64(%ebx),%xmm0 783 pxor %xmm1,%xmm5 784 movdqa 80(%ebx),%xmm1 785 pxor %xmm2,%xmm6 786 movdqa 96(%ebx),%xmm2 787 pxor %xmm3,%xmm7 788 movdqa 112(%ebx),%xmm3 789 movdqu %xmm4,-128(%edi) 790 movdqu %xmm5,-64(%edi) 791 movdqu %xmm6,(%edi) 792 movdqu %xmm7,64(%edi) 793 leal 16(%edi),%edi 794 paddd 64(%ebp),%xmm0 795 paddd 80(%ebp),%xmm1 796 paddd 96(%ebp),%xmm2 797 paddd 112(%ebp),%xmm3 798 movdqa %xmm0,%xmm6 799 punpckldq %xmm1,%xmm0 800 movdqa %xmm2,%xmm7 801 punpckldq %xmm3,%xmm2 802 punpckhdq %xmm1,%xmm6 803 punpckhdq %xmm3,%xmm7 804 movdqa %xmm0,%xmm1 805 punpcklqdq %xmm2,%xmm0 806 movdqa %xmm6,%xmm3 807 punpcklqdq %xmm7,%xmm6 808 punpckhqdq %xmm2,%xmm1 809 punpckhqdq %xmm7,%xmm3 810 movdqu -128(%esi),%xmm4 811 movdqu -64(%esi),%xmm5 812 movdqu (%esi),%xmm2 813 movdqu 64(%esi),%xmm7 814 leal 208(%esi),%esi 815 pxor %xmm0,%xmm4 816 pxor %xmm1,%xmm5 817 pxor %xmm2,%xmm6 818 pxor %xmm3,%xmm7 819 movdqu %xmm4,-128(%edi) 820 movdqu %xmm5,-64(%edi) 821 movdqu %xmm6,(%edi) 822 movdqu %xmm7,64(%edi) 823 leal 208(%edi),%edi 824 subl $256,%ecx 825 jnc L009outer_loop 826 addl $256,%ecx 827 jz L011done 828 movl 520(%esp),%ebx 829 leal -128(%esi),%esi 830 movl 516(%esp),%edx 831 leal -128(%edi),%edi 832 movd 64(%ebp),%xmm2 833 movdqu (%ebx),%xmm3 834 paddd 96(%eax),%xmm2 835 pand 112(%eax),%xmm3 836 por %xmm2,%xmm3 837L0081x: 838 movdqa 32(%eax),%xmm0 839 movdqu (%edx),%xmm1 840 movdqu 16(%edx),%xmm2 841 movdqa (%eax),%xmm6 842 movdqa 16(%eax),%xmm7 843 movl %ebp,48(%esp) 844 movdqa %xmm0,(%esp) 845 movdqa %xmm1,16(%esp) 846 movdqa %xmm2,32(%esp) 847 movdqa %xmm3,48(%esp) 848 movl $10,%edx 849 jmp L012loop1x 850.align 4,0x90 851L013outer1x: 852 movdqa 80(%eax),%xmm3 853 movdqa (%esp),%xmm0 854 movdqa 16(%esp),%xmm1 855 movdqa 32(%esp),%xmm2 856 paddd 48(%esp),%xmm3 857 movl $10,%edx 858 movdqa %xmm3,48(%esp) 859 jmp L012loop1x 860.align 4,0x90 861L012loop1x: 862 paddd %xmm1,%xmm0 863 pxor %xmm0,%xmm3 864.byte 102,15,56,0,222 865 paddd %xmm3,%xmm2 866 pxor %xmm2,%xmm1 867 movdqa %xmm1,%xmm4 868 psrld $20,%xmm1 869 pslld $12,%xmm4 870 por %xmm4,%xmm1 871 paddd %xmm1,%xmm0 872 pxor %xmm0,%xmm3 873.byte 102,15,56,0,223 874 paddd %xmm3,%xmm2 875 pxor %xmm2,%xmm1 876 movdqa %xmm1,%xmm4 877 psrld $25,%xmm1 878 pslld $7,%xmm4 879 por %xmm4,%xmm1 880 pshufd $78,%xmm2,%xmm2 881 pshufd $57,%xmm1,%xmm1 882 pshufd $147,%xmm3,%xmm3 883 nop 884 paddd %xmm1,%xmm0 885 pxor %xmm0,%xmm3 886.byte 102,15,56,0,222 887 paddd %xmm3,%xmm2 888 pxor %xmm2,%xmm1 889 movdqa %xmm1,%xmm4 890 psrld $20,%xmm1 891 pslld $12,%xmm4 892 por %xmm4,%xmm1 893 paddd %xmm1,%xmm0 894 pxor %xmm0,%xmm3 895.byte 102,15,56,0,223 896 paddd %xmm3,%xmm2 897 pxor %xmm2,%xmm1 898 movdqa %xmm1,%xmm4 899 psrld $25,%xmm1 900 pslld $7,%xmm4 901 por %xmm4,%xmm1 902 pshufd $78,%xmm2,%xmm2 903 pshufd $147,%xmm1,%xmm1 904 pshufd $57,%xmm3,%xmm3 905 decl %edx 906 jnz L012loop1x 907 paddd (%esp),%xmm0 908 paddd 16(%esp),%xmm1 909 paddd 32(%esp),%xmm2 910 paddd 48(%esp),%xmm3 911 cmpl $64,%ecx 912 jb L014tail 913 movdqu (%esi),%xmm4 914 movdqu 16(%esi),%xmm5 915 pxor %xmm4,%xmm0 916 movdqu 32(%esi),%xmm4 917 pxor %xmm5,%xmm1 918 movdqu 48(%esi),%xmm5 919 pxor %xmm4,%xmm2 920 pxor %xmm5,%xmm3 921 leal 64(%esi),%esi 922 movdqu %xmm0,(%edi) 923 movdqu %xmm1,16(%edi) 924 movdqu %xmm2,32(%edi) 925 movdqu %xmm3,48(%edi) 926 leal 64(%edi),%edi 927 subl $64,%ecx 928 jnz L013outer1x 929 jmp L011done 930L014tail: 931 movdqa %xmm0,(%esp) 932 movdqa %xmm1,16(%esp) 933 movdqa %xmm2,32(%esp) 934 movdqa %xmm3,48(%esp) 935 xorl %eax,%eax 936 xorl %edx,%edx 937 xorl %ebp,%ebp 938L015tail_loop: 939 movb (%esp,%ebp,1),%al 940 movb (%esi,%ebp,1),%dl 941 leal 1(%ebp),%ebp 942 xorb %dl,%al 943 movb %al,-1(%edi,%ebp,1) 944 decl %ecx 945 jnz L015tail_loop 946L011done: 947 movl 512(%esp),%esp 948 popl %edi 949 popl %esi 950 popl %ebx 951 popl %ebp 952 ret 953.align 6,0x90 954Lssse3_data: 955.byte 2,3,0,1,6,7,4,5,10,11,8,9,14,15,12,13 956.byte 3,0,1,2,7,4,5,6,11,8,9,10,15,12,13,14 957.long 1634760805,857760878,2036477234,1797285236 958.long 0,1,2,3 959.long 4,4,4,4 960.long 1,0,0,0 961.long 4,0,0,0 962.long 0,-1,-1,-1 963.align 6,0x90 964.byte 67,104,97,67,104,97,50,48,32,102,111,114,32,120,56,54 965.byte 44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32 966.byte 60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111 967.byte 114,103,62,0 968.section __IMPORT,__pointers,non_lazy_symbol_pointers 969L_GFp_ia32cap_P$non_lazy_ptr: 970.indirect_symbol _GFp_ia32cap_P 971.long 0 972#endif 973