1%ifidn __OUTPUT_FORMAT__,obj 2section code use32 class=code align=64 3%elifidn __OUTPUT_FORMAT__,win32 4%ifdef __YASM_VERSION_ID__ 5%if __YASM_VERSION_ID__ < 01010000h 6%error yasm version 1.1.0 or later needed. 7%endif 8; Yasm automatically includes .00 and complains about redefining it. 9; https://www.tortall.net/projects/yasm/manual/html/objfmt-win32-safeseh.html 10%else 11$@feat.00 equ 1 12%endif 13section .text code align=64 14%else 15section .text code 16%endif 17global _gcm_gmult_4bit_mmx 18align 16 19_gcm_gmult_4bit_mmx: 20L$_gcm_gmult_4bit_mmx_begin: 21 push ebp 22 push ebx 23 push esi 24 push edi 25 mov edi,DWORD [20+esp] 26 mov esi,DWORD [24+esp] 27 call L$000pic_point 28L$000pic_point: 29 pop eax 30 lea eax,[(L$rem_4bit-L$000pic_point)+eax] 31 movzx ebx,BYTE [15+edi] 32 xor ecx,ecx 33 mov edx,ebx 34 mov cl,dl 35 mov ebp,14 36 shl cl,4 37 and edx,240 38 movq mm0,[8+ecx*1+esi] 39 movq mm1,[ecx*1+esi] 40 movd ebx,mm0 41 jmp NEAR L$001mmx_loop 42align 16 43L$001mmx_loop: 44 psrlq mm0,4 45 and ebx,15 46 movq mm2,mm1 47 psrlq mm1,4 48 pxor mm0,[8+edx*1+esi] 49 mov cl,BYTE [ebp*1+edi] 50 psllq mm2,60 51 pxor mm1,[ebx*8+eax] 52 dec ebp 53 movd ebx,mm0 54 pxor mm1,[edx*1+esi] 55 mov edx,ecx 56 pxor mm0,mm2 57 js NEAR L$002mmx_break 58 shl cl,4 59 and ebx,15 60 psrlq mm0,4 61 and edx,240 62 movq mm2,mm1 63 psrlq mm1,4 64 pxor mm0,[8+ecx*1+esi] 65 psllq mm2,60 66 pxor mm1,[ebx*8+eax] 67 movd ebx,mm0 68 pxor mm1,[ecx*1+esi] 69 pxor mm0,mm2 70 jmp NEAR L$001mmx_loop 71align 16 72L$002mmx_break: 73 shl cl,4 74 and ebx,15 75 psrlq mm0,4 76 and edx,240 77 movq mm2,mm1 78 psrlq mm1,4 79 pxor mm0,[8+ecx*1+esi] 80 psllq mm2,60 81 pxor mm1,[ebx*8+eax] 82 movd ebx,mm0 83 pxor mm1,[ecx*1+esi] 84 pxor mm0,mm2 85 psrlq mm0,4 86 and ebx,15 87 movq mm2,mm1 88 psrlq mm1,4 89 pxor mm0,[8+edx*1+esi] 90 psllq mm2,60 91 pxor mm1,[ebx*8+eax] 92 movd ebx,mm0 93 pxor mm1,[edx*1+esi] 94 pxor mm0,mm2 95 psrlq mm0,32 96 movd edx,mm1 97 psrlq mm1,32 98 movd ecx,mm0 99 movd ebp,mm1 100 bswap ebx 101 bswap edx 102 bswap ecx 103 bswap ebp 104 emms 105 mov DWORD [12+edi],ebx 106 mov DWORD [4+edi],edx 107 mov DWORD [8+edi],ecx 108 mov DWORD [edi],ebp 109 pop edi 110 pop esi 111 pop ebx 112 pop ebp 113 ret 114global _gcm_ghash_4bit_mmx 115align 16 116_gcm_ghash_4bit_mmx: 117L$_gcm_ghash_4bit_mmx_begin: 118 push ebp 119 push ebx 120 push esi 121 push edi 122 mov eax,DWORD [20+esp] 123 mov ebx,DWORD [24+esp] 124 mov ecx,DWORD [28+esp] 125 mov edx,DWORD [32+esp] 126 mov ebp,esp 127 call L$003pic_point 128L$003pic_point: 129 pop esi 130 lea esi,[(L$rem_8bit-L$003pic_point)+esi] 131 sub esp,544 132 and esp,-64 133 sub esp,16 134 add edx,ecx 135 mov DWORD [544+esp],eax 136 mov DWORD [552+esp],edx 137 mov DWORD [556+esp],ebp 138 add ebx,128 139 lea edi,[144+esp] 140 lea ebp,[400+esp] 141 mov edx,DWORD [ebx-120] 142 movq mm0,[ebx-120] 143 movq mm3,[ebx-128] 144 shl edx,4 145 mov BYTE [esp],dl 146 mov edx,DWORD [ebx-104] 147 movq mm2,[ebx-104] 148 movq mm5,[ebx-112] 149 movq [edi-128],mm0 150 psrlq mm0,4 151 movq [edi],mm3 152 movq mm7,mm3 153 psrlq mm3,4 154 shl edx,4 155 mov BYTE [1+esp],dl 156 mov edx,DWORD [ebx-88] 157 movq mm1,[ebx-88] 158 psllq mm7,60 159 movq mm4,[ebx-96] 160 por mm0,mm7 161 movq [edi-120],mm2 162 psrlq mm2,4 163 movq [8+edi],mm5 164 movq mm6,mm5 165 movq [ebp-128],mm0 166 psrlq mm5,4 167 movq [ebp],mm3 168 shl edx,4 169 mov BYTE [2+esp],dl 170 mov edx,DWORD [ebx-72] 171 movq mm0,[ebx-72] 172 psllq mm6,60 173 movq mm3,[ebx-80] 174 por mm2,mm6 175 movq [edi-112],mm1 176 psrlq mm1,4 177 movq [16+edi],mm4 178 movq mm7,mm4 179 movq [ebp-120],mm2 180 psrlq mm4,4 181 movq [8+ebp],mm5 182 shl edx,4 183 mov BYTE [3+esp],dl 184 mov edx,DWORD [ebx-56] 185 movq mm2,[ebx-56] 186 psllq mm7,60 187 movq mm5,[ebx-64] 188 por mm1,mm7 189 movq [edi-104],mm0 190 psrlq mm0,4 191 movq [24+edi],mm3 192 movq mm6,mm3 193 movq [ebp-112],mm1 194 psrlq mm3,4 195 movq [16+ebp],mm4 196 shl edx,4 197 mov BYTE [4+esp],dl 198 mov edx,DWORD [ebx-40] 199 movq mm1,[ebx-40] 200 psllq mm6,60 201 movq mm4,[ebx-48] 202 por mm0,mm6 203 movq [edi-96],mm2 204 psrlq mm2,4 205 movq [32+edi],mm5 206 movq mm7,mm5 207 movq [ebp-104],mm0 208 psrlq mm5,4 209 movq [24+ebp],mm3 210 shl edx,4 211 mov BYTE [5+esp],dl 212 mov edx,DWORD [ebx-24] 213 movq mm0,[ebx-24] 214 psllq mm7,60 215 movq mm3,[ebx-32] 216 por mm2,mm7 217 movq [edi-88],mm1 218 psrlq mm1,4 219 movq [40+edi],mm4 220 movq mm6,mm4 221 movq [ebp-96],mm2 222 psrlq mm4,4 223 movq [32+ebp],mm5 224 shl edx,4 225 mov BYTE [6+esp],dl 226 mov edx,DWORD [ebx-8] 227 movq mm2,[ebx-8] 228 psllq mm6,60 229 movq mm5,[ebx-16] 230 por mm1,mm6 231 movq [edi-80],mm0 232 psrlq mm0,4 233 movq [48+edi],mm3 234 movq mm7,mm3 235 movq [ebp-88],mm1 236 psrlq mm3,4 237 movq [40+ebp],mm4 238 shl edx,4 239 mov BYTE [7+esp],dl 240 mov edx,DWORD [8+ebx] 241 movq mm1,[8+ebx] 242 psllq mm7,60 243 movq mm4,[ebx] 244 por mm0,mm7 245 movq [edi-72],mm2 246 psrlq mm2,4 247 movq [56+edi],mm5 248 movq mm6,mm5 249 movq [ebp-80],mm0 250 psrlq mm5,4 251 movq [48+ebp],mm3 252 shl edx,4 253 mov BYTE [8+esp],dl 254 mov edx,DWORD [24+ebx] 255 movq mm0,[24+ebx] 256 psllq mm6,60 257 movq mm3,[16+ebx] 258 por mm2,mm6 259 movq [edi-64],mm1 260 psrlq mm1,4 261 movq [64+edi],mm4 262 movq mm7,mm4 263 movq [ebp-72],mm2 264 psrlq mm4,4 265 movq [56+ebp],mm5 266 shl edx,4 267 mov BYTE [9+esp],dl 268 mov edx,DWORD [40+ebx] 269 movq mm2,[40+ebx] 270 psllq mm7,60 271 movq mm5,[32+ebx] 272 por mm1,mm7 273 movq [edi-56],mm0 274 psrlq mm0,4 275 movq [72+edi],mm3 276 movq mm6,mm3 277 movq [ebp-64],mm1 278 psrlq mm3,4 279 movq [64+ebp],mm4 280 shl edx,4 281 mov BYTE [10+esp],dl 282 mov edx,DWORD [56+ebx] 283 movq mm1,[56+ebx] 284 psllq mm6,60 285 movq mm4,[48+ebx] 286 por mm0,mm6 287 movq [edi-48],mm2 288 psrlq mm2,4 289 movq [80+edi],mm5 290 movq mm7,mm5 291 movq [ebp-56],mm0 292 psrlq mm5,4 293 movq [72+ebp],mm3 294 shl edx,4 295 mov BYTE [11+esp],dl 296 mov edx,DWORD [72+ebx] 297 movq mm0,[72+ebx] 298 psllq mm7,60 299 movq mm3,[64+ebx] 300 por mm2,mm7 301 movq [edi-40],mm1 302 psrlq mm1,4 303 movq [88+edi],mm4 304 movq mm6,mm4 305 movq [ebp-48],mm2 306 psrlq mm4,4 307 movq [80+ebp],mm5 308 shl edx,4 309 mov BYTE [12+esp],dl 310 mov edx,DWORD [88+ebx] 311 movq mm2,[88+ebx] 312 psllq mm6,60 313 movq mm5,[80+ebx] 314 por mm1,mm6 315 movq [edi-32],mm0 316 psrlq mm0,4 317 movq [96+edi],mm3 318 movq mm7,mm3 319 movq [ebp-40],mm1 320 psrlq mm3,4 321 movq [88+ebp],mm4 322 shl edx,4 323 mov BYTE [13+esp],dl 324 mov edx,DWORD [104+ebx] 325 movq mm1,[104+ebx] 326 psllq mm7,60 327 movq mm4,[96+ebx] 328 por mm0,mm7 329 movq [edi-24],mm2 330 psrlq mm2,4 331 movq [104+edi],mm5 332 movq mm6,mm5 333 movq [ebp-32],mm0 334 psrlq mm5,4 335 movq [96+ebp],mm3 336 shl edx,4 337 mov BYTE [14+esp],dl 338 mov edx,DWORD [120+ebx] 339 movq mm0,[120+ebx] 340 psllq mm6,60 341 movq mm3,[112+ebx] 342 por mm2,mm6 343 movq [edi-16],mm1 344 psrlq mm1,4 345 movq [112+edi],mm4 346 movq mm7,mm4 347 movq [ebp-24],mm2 348 psrlq mm4,4 349 movq [104+ebp],mm5 350 shl edx,4 351 mov BYTE [15+esp],dl 352 psllq mm7,60 353 por mm1,mm7 354 movq [edi-8],mm0 355 psrlq mm0,4 356 movq [120+edi],mm3 357 movq mm6,mm3 358 movq [ebp-16],mm1 359 psrlq mm3,4 360 movq [112+ebp],mm4 361 psllq mm6,60 362 por mm0,mm6 363 movq [ebp-8],mm0 364 movq [120+ebp],mm3 365 movq mm6,[eax] 366 mov ebx,DWORD [8+eax] 367 mov edx,DWORD [12+eax] 368align 16 369L$004outer: 370 xor edx,DWORD [12+ecx] 371 xor ebx,DWORD [8+ecx] 372 pxor mm6,[ecx] 373 lea ecx,[16+ecx] 374 mov DWORD [536+esp],ebx 375 movq [528+esp],mm6 376 mov DWORD [548+esp],ecx 377 xor eax,eax 378 rol edx,8 379 mov al,dl 380 mov ebp,eax 381 and al,15 382 shr ebp,4 383 pxor mm0,mm0 384 rol edx,8 385 pxor mm1,mm1 386 pxor mm2,mm2 387 movq mm7,[16+eax*8+esp] 388 movq mm6,[144+eax*8+esp] 389 mov al,dl 390 movd ebx,mm7 391 psrlq mm7,8 392 movq mm3,mm6 393 mov edi,eax 394 psrlq mm6,8 395 pxor mm7,[272+ebp*8+esp] 396 and al,15 397 psllq mm3,56 398 shr edi,4 399 pxor mm7,[16+eax*8+esp] 400 rol edx,8 401 pxor mm6,[144+eax*8+esp] 402 pxor mm7,mm3 403 pxor mm6,[400+ebp*8+esp] 404 xor bl,BYTE [ebp*1+esp] 405 mov al,dl 406 movd ecx,mm7 407 movzx ebx,bl 408 psrlq mm7,8 409 movq mm3,mm6 410 mov ebp,eax 411 psrlq mm6,8 412 pxor mm7,[272+edi*8+esp] 413 and al,15 414 psllq mm3,56 415 shr ebp,4 416 pinsrw mm2,WORD [ebx*2+esi],2 417 pxor mm7,[16+eax*8+esp] 418 rol edx,8 419 pxor mm6,[144+eax*8+esp] 420 pxor mm7,mm3 421 pxor mm6,[400+edi*8+esp] 422 xor cl,BYTE [edi*1+esp] 423 mov al,dl 424 mov edx,DWORD [536+esp] 425 movd ebx,mm7 426 movzx ecx,cl 427 psrlq mm7,8 428 movq mm3,mm6 429 mov edi,eax 430 psrlq mm6,8 431 pxor mm7,[272+ebp*8+esp] 432 and al,15 433 psllq mm3,56 434 pxor mm6,mm2 435 shr edi,4 436 pinsrw mm1,WORD [ecx*2+esi],2 437 pxor mm7,[16+eax*8+esp] 438 rol edx,8 439 pxor mm6,[144+eax*8+esp] 440 pxor mm7,mm3 441 pxor mm6,[400+ebp*8+esp] 442 xor bl,BYTE [ebp*1+esp] 443 mov al,dl 444 movd ecx,mm7 445 movzx ebx,bl 446 psrlq mm7,8 447 movq mm3,mm6 448 mov ebp,eax 449 psrlq mm6,8 450 pxor mm7,[272+edi*8+esp] 451 and al,15 452 psllq mm3,56 453 pxor mm6,mm1 454 shr ebp,4 455 pinsrw mm0,WORD [ebx*2+esi],2 456 pxor mm7,[16+eax*8+esp] 457 rol edx,8 458 pxor mm6,[144+eax*8+esp] 459 pxor mm7,mm3 460 pxor mm6,[400+edi*8+esp] 461 xor cl,BYTE [edi*1+esp] 462 mov al,dl 463 movd ebx,mm7 464 movzx ecx,cl 465 psrlq mm7,8 466 movq mm3,mm6 467 mov edi,eax 468 psrlq mm6,8 469 pxor mm7,[272+ebp*8+esp] 470 and al,15 471 psllq mm3,56 472 pxor mm6,mm0 473 shr edi,4 474 pinsrw mm2,WORD [ecx*2+esi],2 475 pxor mm7,[16+eax*8+esp] 476 rol edx,8 477 pxor mm6,[144+eax*8+esp] 478 pxor mm7,mm3 479 pxor mm6,[400+ebp*8+esp] 480 xor bl,BYTE [ebp*1+esp] 481 mov al,dl 482 movd ecx,mm7 483 movzx ebx,bl 484 psrlq mm7,8 485 movq mm3,mm6 486 mov ebp,eax 487 psrlq mm6,8 488 pxor mm7,[272+edi*8+esp] 489 and al,15 490 psllq mm3,56 491 pxor mm6,mm2 492 shr ebp,4 493 pinsrw mm1,WORD [ebx*2+esi],2 494 pxor mm7,[16+eax*8+esp] 495 rol edx,8 496 pxor mm6,[144+eax*8+esp] 497 pxor mm7,mm3 498 pxor mm6,[400+edi*8+esp] 499 xor cl,BYTE [edi*1+esp] 500 mov al,dl 501 mov edx,DWORD [532+esp] 502 movd ebx,mm7 503 movzx ecx,cl 504 psrlq mm7,8 505 movq mm3,mm6 506 mov edi,eax 507 psrlq mm6,8 508 pxor mm7,[272+ebp*8+esp] 509 and al,15 510 psllq mm3,56 511 pxor mm6,mm1 512 shr edi,4 513 pinsrw mm0,WORD [ecx*2+esi],2 514 pxor mm7,[16+eax*8+esp] 515 rol edx,8 516 pxor mm6,[144+eax*8+esp] 517 pxor mm7,mm3 518 pxor mm6,[400+ebp*8+esp] 519 xor bl,BYTE [ebp*1+esp] 520 mov al,dl 521 movd ecx,mm7 522 movzx ebx,bl 523 psrlq mm7,8 524 movq mm3,mm6 525 mov ebp,eax 526 psrlq mm6,8 527 pxor mm7,[272+edi*8+esp] 528 and al,15 529 psllq mm3,56 530 pxor mm6,mm0 531 shr ebp,4 532 pinsrw mm2,WORD [ebx*2+esi],2 533 pxor mm7,[16+eax*8+esp] 534 rol edx,8 535 pxor mm6,[144+eax*8+esp] 536 pxor mm7,mm3 537 pxor mm6,[400+edi*8+esp] 538 xor cl,BYTE [edi*1+esp] 539 mov al,dl 540 movd ebx,mm7 541 movzx ecx,cl 542 psrlq mm7,8 543 movq mm3,mm6 544 mov edi,eax 545 psrlq mm6,8 546 pxor mm7,[272+ebp*8+esp] 547 and al,15 548 psllq mm3,56 549 pxor mm6,mm2 550 shr edi,4 551 pinsrw mm1,WORD [ecx*2+esi],2 552 pxor mm7,[16+eax*8+esp] 553 rol edx,8 554 pxor mm6,[144+eax*8+esp] 555 pxor mm7,mm3 556 pxor mm6,[400+ebp*8+esp] 557 xor bl,BYTE [ebp*1+esp] 558 mov al,dl 559 movd ecx,mm7 560 movzx ebx,bl 561 psrlq mm7,8 562 movq mm3,mm6 563 mov ebp,eax 564 psrlq mm6,8 565 pxor mm7,[272+edi*8+esp] 566 and al,15 567 psllq mm3,56 568 pxor mm6,mm1 569 shr ebp,4 570 pinsrw mm0,WORD [ebx*2+esi],2 571 pxor mm7,[16+eax*8+esp] 572 rol edx,8 573 pxor mm6,[144+eax*8+esp] 574 pxor mm7,mm3 575 pxor mm6,[400+edi*8+esp] 576 xor cl,BYTE [edi*1+esp] 577 mov al,dl 578 mov edx,DWORD [528+esp] 579 movd ebx,mm7 580 movzx ecx,cl 581 psrlq mm7,8 582 movq mm3,mm6 583 mov edi,eax 584 psrlq mm6,8 585 pxor mm7,[272+ebp*8+esp] 586 and al,15 587 psllq mm3,56 588 pxor mm6,mm0 589 shr edi,4 590 pinsrw mm2,WORD [ecx*2+esi],2 591 pxor mm7,[16+eax*8+esp] 592 rol edx,8 593 pxor mm6,[144+eax*8+esp] 594 pxor mm7,mm3 595 pxor mm6,[400+ebp*8+esp] 596 xor bl,BYTE [ebp*1+esp] 597 mov al,dl 598 movd ecx,mm7 599 movzx ebx,bl 600 psrlq mm7,8 601 movq mm3,mm6 602 mov ebp,eax 603 psrlq mm6,8 604 pxor mm7,[272+edi*8+esp] 605 and al,15 606 psllq mm3,56 607 pxor mm6,mm2 608 shr ebp,4 609 pinsrw mm1,WORD [ebx*2+esi],2 610 pxor mm7,[16+eax*8+esp] 611 rol edx,8 612 pxor mm6,[144+eax*8+esp] 613 pxor mm7,mm3 614 pxor mm6,[400+edi*8+esp] 615 xor cl,BYTE [edi*1+esp] 616 mov al,dl 617 movd ebx,mm7 618 movzx ecx,cl 619 psrlq mm7,8 620 movq mm3,mm6 621 mov edi,eax 622 psrlq mm6,8 623 pxor mm7,[272+ebp*8+esp] 624 and al,15 625 psllq mm3,56 626 pxor mm6,mm1 627 shr edi,4 628 pinsrw mm0,WORD [ecx*2+esi],2 629 pxor mm7,[16+eax*8+esp] 630 rol edx,8 631 pxor mm6,[144+eax*8+esp] 632 pxor mm7,mm3 633 pxor mm6,[400+ebp*8+esp] 634 xor bl,BYTE [ebp*1+esp] 635 mov al,dl 636 movd ecx,mm7 637 movzx ebx,bl 638 psrlq mm7,8 639 movq mm3,mm6 640 mov ebp,eax 641 psrlq mm6,8 642 pxor mm7,[272+edi*8+esp] 643 and al,15 644 psllq mm3,56 645 pxor mm6,mm0 646 shr ebp,4 647 pinsrw mm2,WORD [ebx*2+esi],2 648 pxor mm7,[16+eax*8+esp] 649 rol edx,8 650 pxor mm6,[144+eax*8+esp] 651 pxor mm7,mm3 652 pxor mm6,[400+edi*8+esp] 653 xor cl,BYTE [edi*1+esp] 654 mov al,dl 655 mov edx,DWORD [524+esp] 656 movd ebx,mm7 657 movzx ecx,cl 658 psrlq mm7,8 659 movq mm3,mm6 660 mov edi,eax 661 psrlq mm6,8 662 pxor mm7,[272+ebp*8+esp] 663 and al,15 664 psllq mm3,56 665 pxor mm6,mm2 666 shr edi,4 667 pinsrw mm1,WORD [ecx*2+esi],2 668 pxor mm7,[16+eax*8+esp] 669 pxor mm6,[144+eax*8+esp] 670 xor bl,BYTE [ebp*1+esp] 671 pxor mm7,mm3 672 pxor mm6,[400+ebp*8+esp] 673 movzx ebx,bl 674 pxor mm2,mm2 675 psllq mm1,4 676 movd ecx,mm7 677 psrlq mm7,4 678 movq mm3,mm6 679 psrlq mm6,4 680 shl ecx,4 681 pxor mm7,[16+edi*8+esp] 682 psllq mm3,60 683 movzx ecx,cl 684 pxor mm7,mm3 685 pxor mm6,[144+edi*8+esp] 686 pinsrw mm0,WORD [ebx*2+esi],2 687 pxor mm6,mm1 688 movd edx,mm7 689 pinsrw mm2,WORD [ecx*2+esi],3 690 psllq mm0,12 691 pxor mm6,mm0 692 psrlq mm7,32 693 pxor mm6,mm2 694 mov ecx,DWORD [548+esp] 695 movd ebx,mm7 696 movq mm3,mm6 697 psllw mm6,8 698 psrlw mm3,8 699 por mm6,mm3 700 bswap edx 701 pshufw mm6,mm6,27 702 bswap ebx 703 cmp ecx,DWORD [552+esp] 704 jne NEAR L$004outer 705 mov eax,DWORD [544+esp] 706 mov DWORD [12+eax],edx 707 mov DWORD [8+eax],ebx 708 movq [eax],mm6 709 mov esp,DWORD [556+esp] 710 emms 711 pop edi 712 pop esi 713 pop ebx 714 pop ebp 715 ret 716global _gcm_init_clmul 717align 16 718_gcm_init_clmul: 719L$_gcm_init_clmul_begin: 720 mov edx,DWORD [4+esp] 721 mov eax,DWORD [8+esp] 722 call L$005pic 723L$005pic: 724 pop ecx 725 lea ecx,[(L$bswap-L$005pic)+ecx] 726 movdqu xmm2,[eax] 727 pshufd xmm2,xmm2,78 728 pshufd xmm4,xmm2,255 729 movdqa xmm3,xmm2 730 psllq xmm2,1 731 pxor xmm5,xmm5 732 psrlq xmm3,63 733 pcmpgtd xmm5,xmm4 734 pslldq xmm3,8 735 por xmm2,xmm3 736 pand xmm5,[16+ecx] 737 pxor xmm2,xmm5 738 movdqa xmm0,xmm2 739 movdqa xmm1,xmm0 740 pshufd xmm3,xmm0,78 741 pshufd xmm4,xmm2,78 742 pxor xmm3,xmm0 743 pxor xmm4,xmm2 744db 102,15,58,68,194,0 745db 102,15,58,68,202,17 746db 102,15,58,68,220,0 747 xorps xmm3,xmm0 748 xorps xmm3,xmm1 749 movdqa xmm4,xmm3 750 psrldq xmm3,8 751 pslldq xmm4,8 752 pxor xmm1,xmm3 753 pxor xmm0,xmm4 754 movdqa xmm4,xmm0 755 movdqa xmm3,xmm0 756 psllq xmm0,5 757 pxor xmm3,xmm0 758 psllq xmm0,1 759 pxor xmm0,xmm3 760 psllq xmm0,57 761 movdqa xmm3,xmm0 762 pslldq xmm0,8 763 psrldq xmm3,8 764 pxor xmm0,xmm4 765 pxor xmm1,xmm3 766 movdqa xmm4,xmm0 767 psrlq xmm0,1 768 pxor xmm1,xmm4 769 pxor xmm4,xmm0 770 psrlq xmm0,5 771 pxor xmm0,xmm4 772 psrlq xmm0,1 773 pxor xmm0,xmm1 774 pshufd xmm3,xmm2,78 775 pshufd xmm4,xmm0,78 776 pxor xmm3,xmm2 777 movdqu [edx],xmm2 778 pxor xmm4,xmm0 779 movdqu [16+edx],xmm0 780db 102,15,58,15,227,8 781 movdqu [32+edx],xmm4 782 ret 783global _gcm_gmult_clmul 784align 16 785_gcm_gmult_clmul: 786L$_gcm_gmult_clmul_begin: 787 mov eax,DWORD [4+esp] 788 mov edx,DWORD [8+esp] 789 call L$006pic 790L$006pic: 791 pop ecx 792 lea ecx,[(L$bswap-L$006pic)+ecx] 793 movdqu xmm0,[eax] 794 movdqa xmm5,[ecx] 795 movups xmm2,[edx] 796db 102,15,56,0,197 797 movups xmm4,[32+edx] 798 movdqa xmm1,xmm0 799 pshufd xmm3,xmm0,78 800 pxor xmm3,xmm0 801db 102,15,58,68,194,0 802db 102,15,58,68,202,17 803db 102,15,58,68,220,0 804 xorps xmm3,xmm0 805 xorps xmm3,xmm1 806 movdqa xmm4,xmm3 807 psrldq xmm3,8 808 pslldq xmm4,8 809 pxor xmm1,xmm3 810 pxor xmm0,xmm4 811 movdqa xmm4,xmm0 812 movdqa xmm3,xmm0 813 psllq xmm0,5 814 pxor xmm3,xmm0 815 psllq xmm0,1 816 pxor xmm0,xmm3 817 psllq xmm0,57 818 movdqa xmm3,xmm0 819 pslldq xmm0,8 820 psrldq xmm3,8 821 pxor xmm0,xmm4 822 pxor xmm1,xmm3 823 movdqa xmm4,xmm0 824 psrlq xmm0,1 825 pxor xmm1,xmm4 826 pxor xmm4,xmm0 827 psrlq xmm0,5 828 pxor xmm0,xmm4 829 psrlq xmm0,1 830 pxor xmm0,xmm1 831db 102,15,56,0,197 832 movdqu [eax],xmm0 833 ret 834global _gcm_ghash_clmul 835align 16 836_gcm_ghash_clmul: 837L$_gcm_ghash_clmul_begin: 838 push ebp 839 push ebx 840 push esi 841 push edi 842 mov eax,DWORD [20+esp] 843 mov edx,DWORD [24+esp] 844 mov esi,DWORD [28+esp] 845 mov ebx,DWORD [32+esp] 846 call L$007pic 847L$007pic: 848 pop ecx 849 lea ecx,[(L$bswap-L$007pic)+ecx] 850 movdqu xmm0,[eax] 851 movdqa xmm5,[ecx] 852 movdqu xmm2,[edx] 853db 102,15,56,0,197 854 sub ebx,16 855 jz NEAR L$008odd_tail 856 movdqu xmm3,[esi] 857 movdqu xmm6,[16+esi] 858db 102,15,56,0,221 859db 102,15,56,0,245 860 movdqu xmm5,[32+edx] 861 pxor xmm0,xmm3 862 pshufd xmm3,xmm6,78 863 movdqa xmm7,xmm6 864 pxor xmm3,xmm6 865 lea esi,[32+esi] 866db 102,15,58,68,242,0 867db 102,15,58,68,250,17 868db 102,15,58,68,221,0 869 movups xmm2,[16+edx] 870 nop 871 sub ebx,32 872 jbe NEAR L$009even_tail 873 jmp NEAR L$010mod_loop 874align 32 875L$010mod_loop: 876 pshufd xmm4,xmm0,78 877 movdqa xmm1,xmm0 878 pxor xmm4,xmm0 879 nop 880db 102,15,58,68,194,0 881db 102,15,58,68,202,17 882db 102,15,58,68,229,16 883 movups xmm2,[edx] 884 xorps xmm0,xmm6 885 movdqa xmm5,[ecx] 886 xorps xmm1,xmm7 887 movdqu xmm7,[esi] 888 pxor xmm3,xmm0 889 movdqu xmm6,[16+esi] 890 pxor xmm3,xmm1 891db 102,15,56,0,253 892 pxor xmm4,xmm3 893 movdqa xmm3,xmm4 894 psrldq xmm4,8 895 pslldq xmm3,8 896 pxor xmm1,xmm4 897 pxor xmm0,xmm3 898db 102,15,56,0,245 899 pxor xmm1,xmm7 900 movdqa xmm7,xmm6 901 movdqa xmm4,xmm0 902 movdqa xmm3,xmm0 903 psllq xmm0,5 904 pxor xmm3,xmm0 905 psllq xmm0,1 906 pxor xmm0,xmm3 907db 102,15,58,68,242,0 908 movups xmm5,[32+edx] 909 psllq xmm0,57 910 movdqa xmm3,xmm0 911 pslldq xmm0,8 912 psrldq xmm3,8 913 pxor xmm0,xmm4 914 pxor xmm1,xmm3 915 pshufd xmm3,xmm7,78 916 movdqa xmm4,xmm0 917 psrlq xmm0,1 918 pxor xmm3,xmm7 919 pxor xmm1,xmm4 920db 102,15,58,68,250,17 921 movups xmm2,[16+edx] 922 pxor xmm4,xmm0 923 psrlq xmm0,5 924 pxor xmm0,xmm4 925 psrlq xmm0,1 926 pxor xmm0,xmm1 927db 102,15,58,68,221,0 928 lea esi,[32+esi] 929 sub ebx,32 930 ja NEAR L$010mod_loop 931L$009even_tail: 932 pshufd xmm4,xmm0,78 933 movdqa xmm1,xmm0 934 pxor xmm4,xmm0 935db 102,15,58,68,194,0 936db 102,15,58,68,202,17 937db 102,15,58,68,229,16 938 movdqa xmm5,[ecx] 939 xorps xmm0,xmm6 940 xorps xmm1,xmm7 941 pxor xmm3,xmm0 942 pxor xmm3,xmm1 943 pxor xmm4,xmm3 944 movdqa xmm3,xmm4 945 psrldq xmm4,8 946 pslldq xmm3,8 947 pxor xmm1,xmm4 948 pxor xmm0,xmm3 949 movdqa xmm4,xmm0 950 movdqa xmm3,xmm0 951 psllq xmm0,5 952 pxor xmm3,xmm0 953 psllq xmm0,1 954 pxor xmm0,xmm3 955 psllq xmm0,57 956 movdqa xmm3,xmm0 957 pslldq xmm0,8 958 psrldq xmm3,8 959 pxor xmm0,xmm4 960 pxor xmm1,xmm3 961 movdqa xmm4,xmm0 962 psrlq xmm0,1 963 pxor xmm1,xmm4 964 pxor xmm4,xmm0 965 psrlq xmm0,5 966 pxor xmm0,xmm4 967 psrlq xmm0,1 968 pxor xmm0,xmm1 969 test ebx,ebx 970 jnz NEAR L$011done 971 movups xmm2,[edx] 972L$008odd_tail: 973 movdqu xmm3,[esi] 974db 102,15,56,0,221 975 pxor xmm0,xmm3 976 movdqa xmm1,xmm0 977 pshufd xmm3,xmm0,78 978 pshufd xmm4,xmm2,78 979 pxor xmm3,xmm0 980 pxor xmm4,xmm2 981db 102,15,58,68,194,0 982db 102,15,58,68,202,17 983db 102,15,58,68,220,0 984 xorps xmm3,xmm0 985 xorps xmm3,xmm1 986 movdqa xmm4,xmm3 987 psrldq xmm3,8 988 pslldq xmm4,8 989 pxor xmm1,xmm3 990 pxor xmm0,xmm4 991 movdqa xmm4,xmm0 992 movdqa xmm3,xmm0 993 psllq xmm0,5 994 pxor xmm3,xmm0 995 psllq xmm0,1 996 pxor xmm0,xmm3 997 psllq xmm0,57 998 movdqa xmm3,xmm0 999 pslldq xmm0,8 1000 psrldq xmm3,8 1001 pxor xmm0,xmm4 1002 pxor xmm1,xmm3 1003 movdqa xmm4,xmm0 1004 psrlq xmm0,1 1005 pxor xmm1,xmm4 1006 pxor xmm4,xmm0 1007 psrlq xmm0,5 1008 pxor xmm0,xmm4 1009 psrlq xmm0,1 1010 pxor xmm0,xmm1 1011L$011done: 1012db 102,15,56,0,197 1013 movdqu [eax],xmm0 1014 pop edi 1015 pop esi 1016 pop ebx 1017 pop ebp 1018 ret 1019align 64 1020L$bswap: 1021db 15,14,13,12,11,10,9,8,7,6,5,4,3,2,1,0 1022db 1,0,0,0,0,0,0,0,0,0,0,0,0,0,0,194 1023align 64 1024L$rem_8bit: 1025dw 0,450,900,582,1800,1738,1164,1358 1026dw 3600,4050,3476,3158,2328,2266,2716,2910 1027dw 7200,7650,8100,7782,6952,6890,6316,6510 1028dw 4656,5106,4532,4214,5432,5370,5820,6014 1029dw 14400,14722,15300,14854,16200,16010,15564,15630 1030dw 13904,14226,13780,13334,12632,12442,13020,13086 1031dw 9312,9634,10212,9766,9064,8874,8428,8494 1032dw 10864,11186,10740,10294,11640,11450,12028,12094 1033dw 28800,28994,29444,29382,30600,30282,29708,30158 1034dw 32400,32594,32020,31958,31128,30810,31260,31710 1035dw 27808,28002,28452,28390,27560,27242,26668,27118 1036dw 25264,25458,24884,24822,26040,25722,26172,26622 1037dw 18624,18690,19268,19078,20424,19978,19532,19854 1038dw 18128,18194,17748,17558,16856,16410,16988,17310 1039dw 21728,21794,22372,22182,21480,21034,20588,20910 1040dw 23280,23346,22900,22710,24056,23610,24188,24510 1041dw 57600,57538,57988,58182,58888,59338,58764,58446 1042dw 61200,61138,60564,60758,59416,59866,60316,59998 1043dw 64800,64738,65188,65382,64040,64490,63916,63598 1044dw 62256,62194,61620,61814,62520,62970,63420,63102 1045dw 55616,55426,56004,56070,56904,57226,56780,56334 1046dw 55120,54930,54484,54550,53336,53658,54236,53790 1047dw 50528,50338,50916,50982,49768,50090,49644,49198 1048dw 52080,51890,51444,51510,52344,52666,53244,52798 1049dw 37248,36930,37380,37830,38536,38730,38156,38094 1050dw 40848,40530,39956,40406,39064,39258,39708,39646 1051dw 36256,35938,36388,36838,35496,35690,35116,35054 1052dw 33712,33394,32820,33270,33976,34170,34620,34558 1053dw 43456,43010,43588,43910,44744,44810,44364,44174 1054dw 42960,42514,42068,42390,41176,41242,41820,41630 1055dw 46560,46114,46692,47014,45800,45866,45420,45230 1056dw 48112,47666,47220,47542,48376,48442,49020,48830 1057align 64 1058L$rem_4bit: 1059dd 0,0,0,471859200,0,943718400,0,610271232 1060dd 0,1887436800,0,1822425088,0,1220542464,0,1423966208 1061dd 0,3774873600,0,4246732800,0,3644850176,0,3311403008 1062dd 0,2441084928,0,2376073216,0,2847932416,0,3051356160 1063db 71,72,65,83,72,32,102,111,114,32,120,56,54,44,32,67 1064db 82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112 1065db 112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62 1066db 0 1067