1# This file is generated from a similarly-named Perl script in the BoringSSL 2# source tree. Do not edit by hand. 3 4#if defined(__i386__) 5#if defined(BORINGSSL_PREFIX) 6#include <boringssl_prefix_symbols_asm.h> 7#endif 8.text 9.globl _gcm_gmult_4bit_mmx 10.private_extern _gcm_gmult_4bit_mmx 11.align 4 12_gcm_gmult_4bit_mmx: 13L_gcm_gmult_4bit_mmx_begin: 14 pushl %ebp 15 pushl %ebx 16 pushl %esi 17 pushl %edi 18 movl 20(%esp),%edi 19 movl 24(%esp),%esi 20 call L000pic_point 21L000pic_point: 22 popl %eax 23 leal Lrem_4bit-L000pic_point(%eax),%eax 24 movzbl 15(%edi),%ebx 25 xorl %ecx,%ecx 26 movl %ebx,%edx 27 movb %dl,%cl 28 movl $14,%ebp 29 shlb $4,%cl 30 andl $240,%edx 31 movq 8(%esi,%ecx,1),%mm0 32 movq (%esi,%ecx,1),%mm1 33 movd %mm0,%ebx 34 jmp L001mmx_loop 35.align 4,0x90 36L001mmx_loop: 37 psrlq $4,%mm0 38 andl $15,%ebx 39 movq %mm1,%mm2 40 psrlq $4,%mm1 41 pxor 8(%esi,%edx,1),%mm0 42 movb (%edi,%ebp,1),%cl 43 psllq $60,%mm2 44 pxor (%eax,%ebx,8),%mm1 45 decl %ebp 46 movd %mm0,%ebx 47 pxor (%esi,%edx,1),%mm1 48 movl %ecx,%edx 49 pxor %mm2,%mm0 50 js L002mmx_break 51 shlb $4,%cl 52 andl $15,%ebx 53 psrlq $4,%mm0 54 andl $240,%edx 55 movq %mm1,%mm2 56 psrlq $4,%mm1 57 pxor 8(%esi,%ecx,1),%mm0 58 psllq $60,%mm2 59 pxor (%eax,%ebx,8),%mm1 60 movd %mm0,%ebx 61 pxor (%esi,%ecx,1),%mm1 62 pxor %mm2,%mm0 63 jmp L001mmx_loop 64.align 4,0x90 65L002mmx_break: 66 shlb $4,%cl 67 andl $15,%ebx 68 psrlq $4,%mm0 69 andl $240,%edx 70 movq %mm1,%mm2 71 psrlq $4,%mm1 72 pxor 8(%esi,%ecx,1),%mm0 73 psllq $60,%mm2 74 pxor (%eax,%ebx,8),%mm1 75 movd %mm0,%ebx 76 pxor (%esi,%ecx,1),%mm1 77 pxor %mm2,%mm0 78 psrlq $4,%mm0 79 andl $15,%ebx 80 movq %mm1,%mm2 81 psrlq $4,%mm1 82 pxor 8(%esi,%edx,1),%mm0 83 psllq $60,%mm2 84 pxor (%eax,%ebx,8),%mm1 85 movd %mm0,%ebx 86 pxor (%esi,%edx,1),%mm1 87 pxor %mm2,%mm0 88 psrlq $32,%mm0 89 movd %mm1,%edx 90 psrlq $32,%mm1 91 movd %mm0,%ecx 92 movd %mm1,%ebp 93 bswap %ebx 94 bswap %edx 95 bswap %ecx 96 bswap %ebp 97 emms 98 movl %ebx,12(%edi) 99 movl %edx,4(%edi) 100 movl %ecx,8(%edi) 101 movl %ebp,(%edi) 102 popl %edi 103 popl %esi 104 popl %ebx 105 popl %ebp 106 ret 107.globl _gcm_ghash_4bit_mmx 108.private_extern _gcm_ghash_4bit_mmx 109.align 4 110_gcm_ghash_4bit_mmx: 111L_gcm_ghash_4bit_mmx_begin: 112 pushl %ebp 113 pushl %ebx 114 pushl %esi 115 pushl %edi 116 movl 20(%esp),%eax 117 movl 24(%esp),%ebx 118 movl 28(%esp),%ecx 119 movl 32(%esp),%edx 120 movl %esp,%ebp 121 call L003pic_point 122L003pic_point: 123 popl %esi 124 leal Lrem_8bit-L003pic_point(%esi),%esi 125 subl $544,%esp 126 andl $-64,%esp 127 subl $16,%esp 128 addl %ecx,%edx 129 movl %eax,544(%esp) 130 movl %edx,552(%esp) 131 movl %ebp,556(%esp) 132 addl $128,%ebx 133 leal 144(%esp),%edi 134 leal 400(%esp),%ebp 135 movl -120(%ebx),%edx 136 movq -120(%ebx),%mm0 137 movq -128(%ebx),%mm3 138 shll $4,%edx 139 movb %dl,(%esp) 140 movl -104(%ebx),%edx 141 movq -104(%ebx),%mm2 142 movq -112(%ebx),%mm5 143 movq %mm0,-128(%edi) 144 psrlq $4,%mm0 145 movq %mm3,(%edi) 146 movq %mm3,%mm7 147 psrlq $4,%mm3 148 shll $4,%edx 149 movb %dl,1(%esp) 150 movl -88(%ebx),%edx 151 movq -88(%ebx),%mm1 152 psllq $60,%mm7 153 movq -96(%ebx),%mm4 154 por %mm7,%mm0 155 movq %mm2,-120(%edi) 156 psrlq $4,%mm2 157 movq %mm5,8(%edi) 158 movq %mm5,%mm6 159 movq %mm0,-128(%ebp) 160 psrlq $4,%mm5 161 movq %mm3,(%ebp) 162 shll $4,%edx 163 movb %dl,2(%esp) 164 movl -72(%ebx),%edx 165 movq -72(%ebx),%mm0 166 psllq $60,%mm6 167 movq -80(%ebx),%mm3 168 por %mm6,%mm2 169 movq %mm1,-112(%edi) 170 psrlq $4,%mm1 171 movq %mm4,16(%edi) 172 movq %mm4,%mm7 173 movq %mm2,-120(%ebp) 174 psrlq $4,%mm4 175 movq %mm5,8(%ebp) 176 shll $4,%edx 177 movb %dl,3(%esp) 178 movl -56(%ebx),%edx 179 movq -56(%ebx),%mm2 180 psllq $60,%mm7 181 movq -64(%ebx),%mm5 182 por %mm7,%mm1 183 movq %mm0,-104(%edi) 184 psrlq $4,%mm0 185 movq %mm3,24(%edi) 186 movq %mm3,%mm6 187 movq %mm1,-112(%ebp) 188 psrlq $4,%mm3 189 movq %mm4,16(%ebp) 190 shll $4,%edx 191 movb %dl,4(%esp) 192 movl -40(%ebx),%edx 193 movq -40(%ebx),%mm1 194 psllq $60,%mm6 195 movq -48(%ebx),%mm4 196 por %mm6,%mm0 197 movq %mm2,-96(%edi) 198 psrlq $4,%mm2 199 movq %mm5,32(%edi) 200 movq %mm5,%mm7 201 movq %mm0,-104(%ebp) 202 psrlq $4,%mm5 203 movq %mm3,24(%ebp) 204 shll $4,%edx 205 movb %dl,5(%esp) 206 movl -24(%ebx),%edx 207 movq -24(%ebx),%mm0 208 psllq $60,%mm7 209 movq -32(%ebx),%mm3 210 por %mm7,%mm2 211 movq %mm1,-88(%edi) 212 psrlq $4,%mm1 213 movq %mm4,40(%edi) 214 movq %mm4,%mm6 215 movq %mm2,-96(%ebp) 216 psrlq $4,%mm4 217 movq %mm5,32(%ebp) 218 shll $4,%edx 219 movb %dl,6(%esp) 220 movl -8(%ebx),%edx 221 movq -8(%ebx),%mm2 222 psllq $60,%mm6 223 movq -16(%ebx),%mm5 224 por %mm6,%mm1 225 movq %mm0,-80(%edi) 226 psrlq $4,%mm0 227 movq %mm3,48(%edi) 228 movq %mm3,%mm7 229 movq %mm1,-88(%ebp) 230 psrlq $4,%mm3 231 movq %mm4,40(%ebp) 232 shll $4,%edx 233 movb %dl,7(%esp) 234 movl 8(%ebx),%edx 235 movq 8(%ebx),%mm1 236 psllq $60,%mm7 237 movq (%ebx),%mm4 238 por %mm7,%mm0 239 movq %mm2,-72(%edi) 240 psrlq $4,%mm2 241 movq %mm5,56(%edi) 242 movq %mm5,%mm6 243 movq %mm0,-80(%ebp) 244 psrlq $4,%mm5 245 movq %mm3,48(%ebp) 246 shll $4,%edx 247 movb %dl,8(%esp) 248 movl 24(%ebx),%edx 249 movq 24(%ebx),%mm0 250 psllq $60,%mm6 251 movq 16(%ebx),%mm3 252 por %mm6,%mm2 253 movq %mm1,-64(%edi) 254 psrlq $4,%mm1 255 movq %mm4,64(%edi) 256 movq %mm4,%mm7 257 movq %mm2,-72(%ebp) 258 psrlq $4,%mm4 259 movq %mm5,56(%ebp) 260 shll $4,%edx 261 movb %dl,9(%esp) 262 movl 40(%ebx),%edx 263 movq 40(%ebx),%mm2 264 psllq $60,%mm7 265 movq 32(%ebx),%mm5 266 por %mm7,%mm1 267 movq %mm0,-56(%edi) 268 psrlq $4,%mm0 269 movq %mm3,72(%edi) 270 movq %mm3,%mm6 271 movq %mm1,-64(%ebp) 272 psrlq $4,%mm3 273 movq %mm4,64(%ebp) 274 shll $4,%edx 275 movb %dl,10(%esp) 276 movl 56(%ebx),%edx 277 movq 56(%ebx),%mm1 278 psllq $60,%mm6 279 movq 48(%ebx),%mm4 280 por %mm6,%mm0 281 movq %mm2,-48(%edi) 282 psrlq $4,%mm2 283 movq %mm5,80(%edi) 284 movq %mm5,%mm7 285 movq %mm0,-56(%ebp) 286 psrlq $4,%mm5 287 movq %mm3,72(%ebp) 288 shll $4,%edx 289 movb %dl,11(%esp) 290 movl 72(%ebx),%edx 291 movq 72(%ebx),%mm0 292 psllq $60,%mm7 293 movq 64(%ebx),%mm3 294 por %mm7,%mm2 295 movq %mm1,-40(%edi) 296 psrlq $4,%mm1 297 movq %mm4,88(%edi) 298 movq %mm4,%mm6 299 movq %mm2,-48(%ebp) 300 psrlq $4,%mm4 301 movq %mm5,80(%ebp) 302 shll $4,%edx 303 movb %dl,12(%esp) 304 movl 88(%ebx),%edx 305 movq 88(%ebx),%mm2 306 psllq $60,%mm6 307 movq 80(%ebx),%mm5 308 por %mm6,%mm1 309 movq %mm0,-32(%edi) 310 psrlq $4,%mm0 311 movq %mm3,96(%edi) 312 movq %mm3,%mm7 313 movq %mm1,-40(%ebp) 314 psrlq $4,%mm3 315 movq %mm4,88(%ebp) 316 shll $4,%edx 317 movb %dl,13(%esp) 318 movl 104(%ebx),%edx 319 movq 104(%ebx),%mm1 320 psllq $60,%mm7 321 movq 96(%ebx),%mm4 322 por %mm7,%mm0 323 movq %mm2,-24(%edi) 324 psrlq $4,%mm2 325 movq %mm5,104(%edi) 326 movq %mm5,%mm6 327 movq %mm0,-32(%ebp) 328 psrlq $4,%mm5 329 movq %mm3,96(%ebp) 330 shll $4,%edx 331 movb %dl,14(%esp) 332 movl 120(%ebx),%edx 333 movq 120(%ebx),%mm0 334 psllq $60,%mm6 335 movq 112(%ebx),%mm3 336 por %mm6,%mm2 337 movq %mm1,-16(%edi) 338 psrlq $4,%mm1 339 movq %mm4,112(%edi) 340 movq %mm4,%mm7 341 movq %mm2,-24(%ebp) 342 psrlq $4,%mm4 343 movq %mm5,104(%ebp) 344 shll $4,%edx 345 movb %dl,15(%esp) 346 psllq $60,%mm7 347 por %mm7,%mm1 348 movq %mm0,-8(%edi) 349 psrlq $4,%mm0 350 movq %mm3,120(%edi) 351 movq %mm3,%mm6 352 movq %mm1,-16(%ebp) 353 psrlq $4,%mm3 354 movq %mm4,112(%ebp) 355 psllq $60,%mm6 356 por %mm6,%mm0 357 movq %mm0,-8(%ebp) 358 movq %mm3,120(%ebp) 359 movq (%eax),%mm6 360 movl 8(%eax),%ebx 361 movl 12(%eax),%edx 362.align 4,0x90 363L004outer: 364 xorl 12(%ecx),%edx 365 xorl 8(%ecx),%ebx 366 pxor (%ecx),%mm6 367 leal 16(%ecx),%ecx 368 movl %ebx,536(%esp) 369 movq %mm6,528(%esp) 370 movl %ecx,548(%esp) 371 xorl %eax,%eax 372 roll $8,%edx 373 movb %dl,%al 374 movl %eax,%ebp 375 andb $15,%al 376 shrl $4,%ebp 377 pxor %mm0,%mm0 378 roll $8,%edx 379 pxor %mm1,%mm1 380 pxor %mm2,%mm2 381 movq 16(%esp,%eax,8),%mm7 382 movq 144(%esp,%eax,8),%mm6 383 movb %dl,%al 384 movd %mm7,%ebx 385 psrlq $8,%mm7 386 movq %mm6,%mm3 387 movl %eax,%edi 388 psrlq $8,%mm6 389 pxor 272(%esp,%ebp,8),%mm7 390 andb $15,%al 391 psllq $56,%mm3 392 shrl $4,%edi 393 pxor 16(%esp,%eax,8),%mm7 394 roll $8,%edx 395 pxor 144(%esp,%eax,8),%mm6 396 pxor %mm3,%mm7 397 pxor 400(%esp,%ebp,8),%mm6 398 xorb (%esp,%ebp,1),%bl 399 movb %dl,%al 400 movd %mm7,%ecx 401 movzbl %bl,%ebx 402 psrlq $8,%mm7 403 movq %mm6,%mm3 404 movl %eax,%ebp 405 psrlq $8,%mm6 406 pxor 272(%esp,%edi,8),%mm7 407 andb $15,%al 408 psllq $56,%mm3 409 shrl $4,%ebp 410 pinsrw $2,(%esi,%ebx,2),%mm2 411 pxor 16(%esp,%eax,8),%mm7 412 roll $8,%edx 413 pxor 144(%esp,%eax,8),%mm6 414 pxor %mm3,%mm7 415 pxor 400(%esp,%edi,8),%mm6 416 xorb (%esp,%edi,1),%cl 417 movb %dl,%al 418 movl 536(%esp),%edx 419 movd %mm7,%ebx 420 movzbl %cl,%ecx 421 psrlq $8,%mm7 422 movq %mm6,%mm3 423 movl %eax,%edi 424 psrlq $8,%mm6 425 pxor 272(%esp,%ebp,8),%mm7 426 andb $15,%al 427 psllq $56,%mm3 428 pxor %mm2,%mm6 429 shrl $4,%edi 430 pinsrw $2,(%esi,%ecx,2),%mm1 431 pxor 16(%esp,%eax,8),%mm7 432 roll $8,%edx 433 pxor 144(%esp,%eax,8),%mm6 434 pxor %mm3,%mm7 435 pxor 400(%esp,%ebp,8),%mm6 436 xorb (%esp,%ebp,1),%bl 437 movb %dl,%al 438 movd %mm7,%ecx 439 movzbl %bl,%ebx 440 psrlq $8,%mm7 441 movq %mm6,%mm3 442 movl %eax,%ebp 443 psrlq $8,%mm6 444 pxor 272(%esp,%edi,8),%mm7 445 andb $15,%al 446 psllq $56,%mm3 447 pxor %mm1,%mm6 448 shrl $4,%ebp 449 pinsrw $2,(%esi,%ebx,2),%mm0 450 pxor 16(%esp,%eax,8),%mm7 451 roll $8,%edx 452 pxor 144(%esp,%eax,8),%mm6 453 pxor %mm3,%mm7 454 pxor 400(%esp,%edi,8),%mm6 455 xorb (%esp,%edi,1),%cl 456 movb %dl,%al 457 movd %mm7,%ebx 458 movzbl %cl,%ecx 459 psrlq $8,%mm7 460 movq %mm6,%mm3 461 movl %eax,%edi 462 psrlq $8,%mm6 463 pxor 272(%esp,%ebp,8),%mm7 464 andb $15,%al 465 psllq $56,%mm3 466 pxor %mm0,%mm6 467 shrl $4,%edi 468 pinsrw $2,(%esi,%ecx,2),%mm2 469 pxor 16(%esp,%eax,8),%mm7 470 roll $8,%edx 471 pxor 144(%esp,%eax,8),%mm6 472 pxor %mm3,%mm7 473 pxor 400(%esp,%ebp,8),%mm6 474 xorb (%esp,%ebp,1),%bl 475 movb %dl,%al 476 movd %mm7,%ecx 477 movzbl %bl,%ebx 478 psrlq $8,%mm7 479 movq %mm6,%mm3 480 movl %eax,%ebp 481 psrlq $8,%mm6 482 pxor 272(%esp,%edi,8),%mm7 483 andb $15,%al 484 psllq $56,%mm3 485 pxor %mm2,%mm6 486 shrl $4,%ebp 487 pinsrw $2,(%esi,%ebx,2),%mm1 488 pxor 16(%esp,%eax,8),%mm7 489 roll $8,%edx 490 pxor 144(%esp,%eax,8),%mm6 491 pxor %mm3,%mm7 492 pxor 400(%esp,%edi,8),%mm6 493 xorb (%esp,%edi,1),%cl 494 movb %dl,%al 495 movl 532(%esp),%edx 496 movd %mm7,%ebx 497 movzbl %cl,%ecx 498 psrlq $8,%mm7 499 movq %mm6,%mm3 500 movl %eax,%edi 501 psrlq $8,%mm6 502 pxor 272(%esp,%ebp,8),%mm7 503 andb $15,%al 504 psllq $56,%mm3 505 pxor %mm1,%mm6 506 shrl $4,%edi 507 pinsrw $2,(%esi,%ecx,2),%mm0 508 pxor 16(%esp,%eax,8),%mm7 509 roll $8,%edx 510 pxor 144(%esp,%eax,8),%mm6 511 pxor %mm3,%mm7 512 pxor 400(%esp,%ebp,8),%mm6 513 xorb (%esp,%ebp,1),%bl 514 movb %dl,%al 515 movd %mm7,%ecx 516 movzbl %bl,%ebx 517 psrlq $8,%mm7 518 movq %mm6,%mm3 519 movl %eax,%ebp 520 psrlq $8,%mm6 521 pxor 272(%esp,%edi,8),%mm7 522 andb $15,%al 523 psllq $56,%mm3 524 pxor %mm0,%mm6 525 shrl $4,%ebp 526 pinsrw $2,(%esi,%ebx,2),%mm2 527 pxor 16(%esp,%eax,8),%mm7 528 roll $8,%edx 529 pxor 144(%esp,%eax,8),%mm6 530 pxor %mm3,%mm7 531 pxor 400(%esp,%edi,8),%mm6 532 xorb (%esp,%edi,1),%cl 533 movb %dl,%al 534 movd %mm7,%ebx 535 movzbl %cl,%ecx 536 psrlq $8,%mm7 537 movq %mm6,%mm3 538 movl %eax,%edi 539 psrlq $8,%mm6 540 pxor 272(%esp,%ebp,8),%mm7 541 andb $15,%al 542 psllq $56,%mm3 543 pxor %mm2,%mm6 544 shrl $4,%edi 545 pinsrw $2,(%esi,%ecx,2),%mm1 546 pxor 16(%esp,%eax,8),%mm7 547 roll $8,%edx 548 pxor 144(%esp,%eax,8),%mm6 549 pxor %mm3,%mm7 550 pxor 400(%esp,%ebp,8),%mm6 551 xorb (%esp,%ebp,1),%bl 552 movb %dl,%al 553 movd %mm7,%ecx 554 movzbl %bl,%ebx 555 psrlq $8,%mm7 556 movq %mm6,%mm3 557 movl %eax,%ebp 558 psrlq $8,%mm6 559 pxor 272(%esp,%edi,8),%mm7 560 andb $15,%al 561 psllq $56,%mm3 562 pxor %mm1,%mm6 563 shrl $4,%ebp 564 pinsrw $2,(%esi,%ebx,2),%mm0 565 pxor 16(%esp,%eax,8),%mm7 566 roll $8,%edx 567 pxor 144(%esp,%eax,8),%mm6 568 pxor %mm3,%mm7 569 pxor 400(%esp,%edi,8),%mm6 570 xorb (%esp,%edi,1),%cl 571 movb %dl,%al 572 movl 528(%esp),%edx 573 movd %mm7,%ebx 574 movzbl %cl,%ecx 575 psrlq $8,%mm7 576 movq %mm6,%mm3 577 movl %eax,%edi 578 psrlq $8,%mm6 579 pxor 272(%esp,%ebp,8),%mm7 580 andb $15,%al 581 psllq $56,%mm3 582 pxor %mm0,%mm6 583 shrl $4,%edi 584 pinsrw $2,(%esi,%ecx,2),%mm2 585 pxor 16(%esp,%eax,8),%mm7 586 roll $8,%edx 587 pxor 144(%esp,%eax,8),%mm6 588 pxor %mm3,%mm7 589 pxor 400(%esp,%ebp,8),%mm6 590 xorb (%esp,%ebp,1),%bl 591 movb %dl,%al 592 movd %mm7,%ecx 593 movzbl %bl,%ebx 594 psrlq $8,%mm7 595 movq %mm6,%mm3 596 movl %eax,%ebp 597 psrlq $8,%mm6 598 pxor 272(%esp,%edi,8),%mm7 599 andb $15,%al 600 psllq $56,%mm3 601 pxor %mm2,%mm6 602 shrl $4,%ebp 603 pinsrw $2,(%esi,%ebx,2),%mm1 604 pxor 16(%esp,%eax,8),%mm7 605 roll $8,%edx 606 pxor 144(%esp,%eax,8),%mm6 607 pxor %mm3,%mm7 608 pxor 400(%esp,%edi,8),%mm6 609 xorb (%esp,%edi,1),%cl 610 movb %dl,%al 611 movd %mm7,%ebx 612 movzbl %cl,%ecx 613 psrlq $8,%mm7 614 movq %mm6,%mm3 615 movl %eax,%edi 616 psrlq $8,%mm6 617 pxor 272(%esp,%ebp,8),%mm7 618 andb $15,%al 619 psllq $56,%mm3 620 pxor %mm1,%mm6 621 shrl $4,%edi 622 pinsrw $2,(%esi,%ecx,2),%mm0 623 pxor 16(%esp,%eax,8),%mm7 624 roll $8,%edx 625 pxor 144(%esp,%eax,8),%mm6 626 pxor %mm3,%mm7 627 pxor 400(%esp,%ebp,8),%mm6 628 xorb (%esp,%ebp,1),%bl 629 movb %dl,%al 630 movd %mm7,%ecx 631 movzbl %bl,%ebx 632 psrlq $8,%mm7 633 movq %mm6,%mm3 634 movl %eax,%ebp 635 psrlq $8,%mm6 636 pxor 272(%esp,%edi,8),%mm7 637 andb $15,%al 638 psllq $56,%mm3 639 pxor %mm0,%mm6 640 shrl $4,%ebp 641 pinsrw $2,(%esi,%ebx,2),%mm2 642 pxor 16(%esp,%eax,8),%mm7 643 roll $8,%edx 644 pxor 144(%esp,%eax,8),%mm6 645 pxor %mm3,%mm7 646 pxor 400(%esp,%edi,8),%mm6 647 xorb (%esp,%edi,1),%cl 648 movb %dl,%al 649 movl 524(%esp),%edx 650 movd %mm7,%ebx 651 movzbl %cl,%ecx 652 psrlq $8,%mm7 653 movq %mm6,%mm3 654 movl %eax,%edi 655 psrlq $8,%mm6 656 pxor 272(%esp,%ebp,8),%mm7 657 andb $15,%al 658 psllq $56,%mm3 659 pxor %mm2,%mm6 660 shrl $4,%edi 661 pinsrw $2,(%esi,%ecx,2),%mm1 662 pxor 16(%esp,%eax,8),%mm7 663 pxor 144(%esp,%eax,8),%mm6 664 xorb (%esp,%ebp,1),%bl 665 pxor %mm3,%mm7 666 pxor 400(%esp,%ebp,8),%mm6 667 movzbl %bl,%ebx 668 pxor %mm2,%mm2 669 psllq $4,%mm1 670 movd %mm7,%ecx 671 psrlq $4,%mm7 672 movq %mm6,%mm3 673 psrlq $4,%mm6 674 shll $4,%ecx 675 pxor 16(%esp,%edi,8),%mm7 676 psllq $60,%mm3 677 movzbl %cl,%ecx 678 pxor %mm3,%mm7 679 pxor 144(%esp,%edi,8),%mm6 680 pinsrw $2,(%esi,%ebx,2),%mm0 681 pxor %mm1,%mm6 682 movd %mm7,%edx 683 pinsrw $3,(%esi,%ecx,2),%mm2 684 psllq $12,%mm0 685 pxor %mm0,%mm6 686 psrlq $32,%mm7 687 pxor %mm2,%mm6 688 movl 548(%esp),%ecx 689 movd %mm7,%ebx 690 movq %mm6,%mm3 691 psllw $8,%mm6 692 psrlw $8,%mm3 693 por %mm3,%mm6 694 bswap %edx 695 pshufw $27,%mm6,%mm6 696 bswap %ebx 697 cmpl 552(%esp),%ecx 698 jne L004outer 699 movl 544(%esp),%eax 700 movl %edx,12(%eax) 701 movl %ebx,8(%eax) 702 movq %mm6,(%eax) 703 movl 556(%esp),%esp 704 emms 705 popl %edi 706 popl %esi 707 popl %ebx 708 popl %ebp 709 ret 710.globl _gcm_init_clmul 711.private_extern _gcm_init_clmul 712.align 4 713_gcm_init_clmul: 714L_gcm_init_clmul_begin: 715 movl 4(%esp),%edx 716 movl 8(%esp),%eax 717 call L005pic 718L005pic: 719 popl %ecx 720 leal Lbswap-L005pic(%ecx),%ecx 721 movdqu (%eax),%xmm2 722 pshufd $78,%xmm2,%xmm2 723 pshufd $255,%xmm2,%xmm4 724 movdqa %xmm2,%xmm3 725 psllq $1,%xmm2 726 pxor %xmm5,%xmm5 727 psrlq $63,%xmm3 728 pcmpgtd %xmm4,%xmm5 729 pslldq $8,%xmm3 730 por %xmm3,%xmm2 731 pand 16(%ecx),%xmm5 732 pxor %xmm5,%xmm2 733 movdqa %xmm2,%xmm0 734 movdqa %xmm0,%xmm1 735 pshufd $78,%xmm0,%xmm3 736 pshufd $78,%xmm2,%xmm4 737 pxor %xmm0,%xmm3 738 pxor %xmm2,%xmm4 739.byte 102,15,58,68,194,0 740.byte 102,15,58,68,202,17 741.byte 102,15,58,68,220,0 742 xorps %xmm0,%xmm3 743 xorps %xmm1,%xmm3 744 movdqa %xmm3,%xmm4 745 psrldq $8,%xmm3 746 pslldq $8,%xmm4 747 pxor %xmm3,%xmm1 748 pxor %xmm4,%xmm0 749 movdqa %xmm0,%xmm4 750 movdqa %xmm0,%xmm3 751 psllq $5,%xmm0 752 pxor %xmm0,%xmm3 753 psllq $1,%xmm0 754 pxor %xmm3,%xmm0 755 psllq $57,%xmm0 756 movdqa %xmm0,%xmm3 757 pslldq $8,%xmm0 758 psrldq $8,%xmm3 759 pxor %xmm4,%xmm0 760 pxor %xmm3,%xmm1 761 movdqa %xmm0,%xmm4 762 psrlq $1,%xmm0 763 pxor %xmm4,%xmm1 764 pxor %xmm0,%xmm4 765 psrlq $5,%xmm0 766 pxor %xmm4,%xmm0 767 psrlq $1,%xmm0 768 pxor %xmm1,%xmm0 769 pshufd $78,%xmm2,%xmm3 770 pshufd $78,%xmm0,%xmm4 771 pxor %xmm2,%xmm3 772 movdqu %xmm2,(%edx) 773 pxor %xmm0,%xmm4 774 movdqu %xmm0,16(%edx) 775.byte 102,15,58,15,227,8 776 movdqu %xmm4,32(%edx) 777 ret 778.globl _gcm_gmult_clmul 779.private_extern _gcm_gmult_clmul 780.align 4 781_gcm_gmult_clmul: 782L_gcm_gmult_clmul_begin: 783 movl 4(%esp),%eax 784 movl 8(%esp),%edx 785 call L006pic 786L006pic: 787 popl %ecx 788 leal Lbswap-L006pic(%ecx),%ecx 789 movdqu (%eax),%xmm0 790 movdqa (%ecx),%xmm5 791 movups (%edx),%xmm2 792.byte 102,15,56,0,197 793 movups 32(%edx),%xmm4 794 movdqa %xmm0,%xmm1 795 pshufd $78,%xmm0,%xmm3 796 pxor %xmm0,%xmm3 797.byte 102,15,58,68,194,0 798.byte 102,15,58,68,202,17 799.byte 102,15,58,68,220,0 800 xorps %xmm0,%xmm3 801 xorps %xmm1,%xmm3 802 movdqa %xmm3,%xmm4 803 psrldq $8,%xmm3 804 pslldq $8,%xmm4 805 pxor %xmm3,%xmm1 806 pxor %xmm4,%xmm0 807 movdqa %xmm0,%xmm4 808 movdqa %xmm0,%xmm3 809 psllq $5,%xmm0 810 pxor %xmm0,%xmm3 811 psllq $1,%xmm0 812 pxor %xmm3,%xmm0 813 psllq $57,%xmm0 814 movdqa %xmm0,%xmm3 815 pslldq $8,%xmm0 816 psrldq $8,%xmm3 817 pxor %xmm4,%xmm0 818 pxor %xmm3,%xmm1 819 movdqa %xmm0,%xmm4 820 psrlq $1,%xmm0 821 pxor %xmm4,%xmm1 822 pxor %xmm0,%xmm4 823 psrlq $5,%xmm0 824 pxor %xmm4,%xmm0 825 psrlq $1,%xmm0 826 pxor %xmm1,%xmm0 827.byte 102,15,56,0,197 828 movdqu %xmm0,(%eax) 829 ret 830.globl _gcm_ghash_clmul 831.private_extern _gcm_ghash_clmul 832.align 4 833_gcm_ghash_clmul: 834L_gcm_ghash_clmul_begin: 835 pushl %ebp 836 pushl %ebx 837 pushl %esi 838 pushl %edi 839 movl 20(%esp),%eax 840 movl 24(%esp),%edx 841 movl 28(%esp),%esi 842 movl 32(%esp),%ebx 843 call L007pic 844L007pic: 845 popl %ecx 846 leal Lbswap-L007pic(%ecx),%ecx 847 movdqu (%eax),%xmm0 848 movdqa (%ecx),%xmm5 849 movdqu (%edx),%xmm2 850.byte 102,15,56,0,197 851 subl $16,%ebx 852 jz L008odd_tail 853 movdqu (%esi),%xmm3 854 movdqu 16(%esi),%xmm6 855.byte 102,15,56,0,221 856.byte 102,15,56,0,245 857 movdqu 32(%edx),%xmm5 858 pxor %xmm3,%xmm0 859 pshufd $78,%xmm6,%xmm3 860 movdqa %xmm6,%xmm7 861 pxor %xmm6,%xmm3 862 leal 32(%esi),%esi 863.byte 102,15,58,68,242,0 864.byte 102,15,58,68,250,17 865.byte 102,15,58,68,221,0 866 movups 16(%edx),%xmm2 867 nop 868 subl $32,%ebx 869 jbe L009even_tail 870 jmp L010mod_loop 871.align 5,0x90 872L010mod_loop: 873 pshufd $78,%xmm0,%xmm4 874 movdqa %xmm0,%xmm1 875 pxor %xmm0,%xmm4 876 nop 877.byte 102,15,58,68,194,0 878.byte 102,15,58,68,202,17 879.byte 102,15,58,68,229,16 880 movups (%edx),%xmm2 881 xorps %xmm6,%xmm0 882 movdqa (%ecx),%xmm5 883 xorps %xmm7,%xmm1 884 movdqu (%esi),%xmm7 885 pxor %xmm0,%xmm3 886 movdqu 16(%esi),%xmm6 887 pxor %xmm1,%xmm3 888.byte 102,15,56,0,253 889 pxor %xmm3,%xmm4 890 movdqa %xmm4,%xmm3 891 psrldq $8,%xmm4 892 pslldq $8,%xmm3 893 pxor %xmm4,%xmm1 894 pxor %xmm3,%xmm0 895.byte 102,15,56,0,245 896 pxor %xmm7,%xmm1 897 movdqa %xmm6,%xmm7 898 movdqa %xmm0,%xmm4 899 movdqa %xmm0,%xmm3 900 psllq $5,%xmm0 901 pxor %xmm0,%xmm3 902 psllq $1,%xmm0 903 pxor %xmm3,%xmm0 904.byte 102,15,58,68,242,0 905 movups 32(%edx),%xmm5 906 psllq $57,%xmm0 907 movdqa %xmm0,%xmm3 908 pslldq $8,%xmm0 909 psrldq $8,%xmm3 910 pxor %xmm4,%xmm0 911 pxor %xmm3,%xmm1 912 pshufd $78,%xmm7,%xmm3 913 movdqa %xmm0,%xmm4 914 psrlq $1,%xmm0 915 pxor %xmm7,%xmm3 916 pxor %xmm4,%xmm1 917.byte 102,15,58,68,250,17 918 movups 16(%edx),%xmm2 919 pxor %xmm0,%xmm4 920 psrlq $5,%xmm0 921 pxor %xmm4,%xmm0 922 psrlq $1,%xmm0 923 pxor %xmm1,%xmm0 924.byte 102,15,58,68,221,0 925 leal 32(%esi),%esi 926 subl $32,%ebx 927 ja L010mod_loop 928L009even_tail: 929 pshufd $78,%xmm0,%xmm4 930 movdqa %xmm0,%xmm1 931 pxor %xmm0,%xmm4 932.byte 102,15,58,68,194,0 933.byte 102,15,58,68,202,17 934.byte 102,15,58,68,229,16 935 movdqa (%ecx),%xmm5 936 xorps %xmm6,%xmm0 937 xorps %xmm7,%xmm1 938 pxor %xmm0,%xmm3 939 pxor %xmm1,%xmm3 940 pxor %xmm3,%xmm4 941 movdqa %xmm4,%xmm3 942 psrldq $8,%xmm4 943 pslldq $8,%xmm3 944 pxor %xmm4,%xmm1 945 pxor %xmm3,%xmm0 946 movdqa %xmm0,%xmm4 947 movdqa %xmm0,%xmm3 948 psllq $5,%xmm0 949 pxor %xmm0,%xmm3 950 psllq $1,%xmm0 951 pxor %xmm3,%xmm0 952 psllq $57,%xmm0 953 movdqa %xmm0,%xmm3 954 pslldq $8,%xmm0 955 psrldq $8,%xmm3 956 pxor %xmm4,%xmm0 957 pxor %xmm3,%xmm1 958 movdqa %xmm0,%xmm4 959 psrlq $1,%xmm0 960 pxor %xmm4,%xmm1 961 pxor %xmm0,%xmm4 962 psrlq $5,%xmm0 963 pxor %xmm4,%xmm0 964 psrlq $1,%xmm0 965 pxor %xmm1,%xmm0 966 testl %ebx,%ebx 967 jnz L011done 968 movups (%edx),%xmm2 969L008odd_tail: 970 movdqu (%esi),%xmm3 971.byte 102,15,56,0,221 972 pxor %xmm3,%xmm0 973 movdqa %xmm0,%xmm1 974 pshufd $78,%xmm0,%xmm3 975 pshufd $78,%xmm2,%xmm4 976 pxor %xmm0,%xmm3 977 pxor %xmm2,%xmm4 978.byte 102,15,58,68,194,0 979.byte 102,15,58,68,202,17 980.byte 102,15,58,68,220,0 981 xorps %xmm0,%xmm3 982 xorps %xmm1,%xmm3 983 movdqa %xmm3,%xmm4 984 psrldq $8,%xmm3 985 pslldq $8,%xmm4 986 pxor %xmm3,%xmm1 987 pxor %xmm4,%xmm0 988 movdqa %xmm0,%xmm4 989 movdqa %xmm0,%xmm3 990 psllq $5,%xmm0 991 pxor %xmm0,%xmm3 992 psllq $1,%xmm0 993 pxor %xmm3,%xmm0 994 psllq $57,%xmm0 995 movdqa %xmm0,%xmm3 996 pslldq $8,%xmm0 997 psrldq $8,%xmm3 998 pxor %xmm4,%xmm0 999 pxor %xmm3,%xmm1 1000 movdqa %xmm0,%xmm4 1001 psrlq $1,%xmm0 1002 pxor %xmm4,%xmm1 1003 pxor %xmm0,%xmm4 1004 psrlq $5,%xmm0 1005 pxor %xmm4,%xmm0 1006 psrlq $1,%xmm0 1007 pxor %xmm1,%xmm0 1008L011done: 1009.byte 102,15,56,0,197 1010 movdqu %xmm0,(%eax) 1011 popl %edi 1012 popl %esi 1013 popl %ebx 1014 popl %ebp 1015 ret 1016.align 6,0x90 1017Lbswap: 1018.byte 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1019.byte 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 1020.align 6,0x90 1021Lrem_8bit: 1022.value 0,450,900,582,1800,1738,1164,1358 1023.value 3600,4050,3476,3158,2328,2266,2716,2910 1024.value 7200,7650,8100,7782,6952,6890,6316,6510 1025.value 4656,5106,4532,4214,5432,5370,5820,6014 1026.value 14400,14722,15300,14854,16200,16010,15564,15630 1027.value 13904,14226,13780,13334,12632,12442,13020,13086 1028.value 9312,9634,10212,9766,9064,8874,8428,8494 1029.value 10864,11186,10740,10294,11640,11450,12028,12094 1030.value 28800,28994,29444,29382,30600,30282,29708,30158 1031.value 32400,32594,32020,31958,31128,30810,31260,31710 1032.value 27808,28002,28452,28390,27560,27242,26668,27118 1033.value 25264,25458,24884,24822,26040,25722,26172,26622 1034.value 18624,18690,19268,19078,20424,19978,19532,19854 1035.value 18128,18194,17748,17558,16856,16410,16988,17310 1036.value 21728,21794,22372,22182,21480,21034,20588,20910 1037.value 23280,23346,22900,22710,24056,23610,24188,24510 1038.value 57600,57538,57988,58182,58888,59338,58764,58446 1039.value 61200,61138,60564,60758,59416,59866,60316,59998 1040.value 64800,64738,65188,65382,64040,64490,63916,63598 1041.value 62256,62194,61620,61814,62520,62970,63420,63102 1042.value 55616,55426,56004,56070,56904,57226,56780,56334 1043.value 55120,54930,54484,54550,53336,53658,54236,53790 1044.value 50528,50338,50916,50982,49768,50090,49644,49198 1045.value 52080,51890,51444,51510,52344,52666,53244,52798 1046.value 37248,36930,37380,37830,38536,38730,38156,38094 1047.value 40848,40530,39956,40406,39064,39258,39708,39646 1048.value 36256,35938,36388,36838,35496,35690,35116,35054 1049.value 33712,33394,32820,33270,33976,34170,34620,34558 1050.value 43456,43010,43588,43910,44744,44810,44364,44174 1051.value 42960,42514,42068,42390,41176,41242,41820,41630 1052.value 46560,46114,46692,47014,45800,45866,45420,45230 1053.value 48112,47666,47220,47542,48376,48442,49020,48830 1054.align 6,0x90 1055Lrem_4bit: 1056.long 0,0,0,471859200,0,943718400,0,610271232 1057.long 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1058.long 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1059.long 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1060.byte 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 1061.byte 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 1062.byte 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 1063.byte 0 1064#endif 1065