1/* 2Copyright (c) 2010, 2011, 2012, 2013 Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#ifndef L 32# define L(label) .L##label 33#endif 34 35#ifndef cfi_startproc 36# define cfi_startproc .cfi_startproc 37#endif 38 39#ifndef cfi_endproc 40# define cfi_endproc .cfi_endproc 41#endif 42 43#ifndef cfi_rel_offset 44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45#endif 46 47#ifndef cfi_restore 48# define cfi_restore(reg) .cfi_restore reg 49#endif 50 51#ifndef cfi_adjust_cfa_offset 52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53#endif 54 55#ifndef cfi_remember_state 56# define cfi_remember_state .cfi_remember_state 57#endif 58 59#ifndef cfi_restore_state 60# define cfi_restore_state .cfi_restore_state 61#endif 62 63#ifndef ENTRY 64# define ENTRY(name) \ 65 .type name, @function; \ 66 .globl name; \ 67 .p2align 4; \ 68name: \ 69 cfi_startproc 70#endif 71 72#ifndef END 73# define END(name) \ 74 cfi_endproc; \ 75 .size name, .-name 76#endif 77 78#ifndef MEMCMP 79# define MEMCMP memcmp 80#endif 81 82#define CFI_PUSH(REG) \ 83 cfi_adjust_cfa_offset (4); \ 84 cfi_rel_offset (REG, 0) 85 86#define CFI_POP(REG) \ 87 cfi_adjust_cfa_offset (-4); \ 88 cfi_restore (REG) 89 90#define PUSH(REG) pushl REG; CFI_PUSH (REG) 91#define POP(REG) popl REG; CFI_POP (REG) 92 93#define PARMS 4 94#define BLK1 PARMS 95#define BLK2 BLK1+4 96#define LEN BLK2+4 97#define RETURN_END POP (%edi); POP (%esi); POP (%ebx); ret 98#define RETURN RETURN_END; cfi_restore_state; cfi_remember_state 99 100/* Warning! 101 wmemcmp has to use SIGNED comparison for elements. 102 memcmp has to use UNSIGNED comparison for elemnts. 103*/ 104 105 .text 106ENTRY (MEMCMP) 107 movl LEN(%esp), %ecx 108 109#ifdef USE_WCHAR 110 shl $2, %ecx 111 jz L(zero) 112#elif defined USE_UTF16 113 shl $1, %ecx 114 jz L(zero) 115#endif 116 117 movl BLK1(%esp), %eax 118 cmp $48, %ecx 119 movl BLK2(%esp), %edx 120 jae L(48bytesormore) 121 122#if !defined(USE_WCHAR) && !defined(USE_UTF16) 123 cmp $1, %ecx 124 jbe L(less1bytes) 125#endif 126 127 PUSH (%ebx) 128 add %ecx, %edx 129 add %ecx, %eax 130 jmp L(less48bytes) 131 132 CFI_POP (%ebx) 133 134#if !defined(USE_WCHAR) && !defined(USE_UTF16) 135 .p2align 4 136L(less1bytes): 137 jb L(zero) 138 movb (%eax), %cl 139 cmp (%edx), %cl 140 je L(zero) 141 mov $1, %eax 142 ja L(1bytesend) 143 neg %eax 144L(1bytesend): 145 ret 146#endif 147 148 .p2align 4 149L(zero): 150 xor %eax, %eax 151 ret 152 153 .p2align 4 154L(48bytesormore): 155 PUSH (%ebx) 156 PUSH (%esi) 157 PUSH (%edi) 158 cfi_remember_state 159 movdqu (%eax), %xmm3 160 movdqu (%edx), %xmm0 161 movl %eax, %edi 162 movl %edx, %esi 163 pcmpeqb %xmm0, %xmm3 164 pmovmskb %xmm3, %edx 165 lea 16(%edi), %edi 166 167 sub $0xffff, %edx 168 lea 16(%esi), %esi 169 jnz L(less16bytes) 170 mov %edi, %edx 171 and $0xf, %edx 172 xor %edx, %edi 173 sub %edx, %esi 174 add %edx, %ecx 175 mov %esi, %edx 176 and $0xf, %edx 177 jz L(shr_0) 178 xor %edx, %esi 179 180#if !defined(USE_WCHAR) && !defined(USE_UTF16) 181 cmp $8, %edx 182 jae L(next_unaligned_table) 183 cmp $0, %edx 184 je L(shr_0) 185 cmp $1, %edx 186 je L(shr_1) 187 cmp $2, %edx 188 je L(shr_2) 189 cmp $3, %edx 190 je L(shr_3) 191 cmp $4, %edx 192 je L(shr_4) 193 cmp $5, %edx 194 je L(shr_5) 195 cmp $6, %edx 196 je L(shr_6) 197 jmp L(shr_7) 198 199 .p2align 2 200L(next_unaligned_table): 201 cmp $8, %edx 202 je L(shr_8) 203 cmp $9, %edx 204 je L(shr_9) 205 cmp $10, %edx 206 je L(shr_10) 207 cmp $11, %edx 208 je L(shr_11) 209 cmp $12, %edx 210 je L(shr_12) 211 cmp $13, %edx 212 je L(shr_13) 213 cmp $14, %edx 214 je L(shr_14) 215 jmp L(shr_15) 216#elif defined(USE_WCHAR) 217 cmp $0, %edx 218 je L(shr_0) 219 cmp $4, %edx 220 je L(shr_4) 221 cmp $8, %edx 222 je L(shr_8) 223 jmp L(shr_12) 224#elif defined(USE_UTF16) 225 cmp $0, %edx 226 je L(shr_0) 227 cmp $2, %edx 228 je L(shr_2) 229 cmp $4, %edx 230 je L(shr_4) 231 cmp $6, %edx 232 je L(shr_6) 233 cmp $8, %edx 234 je L(shr_8) 235 cmp $10, %edx 236 je L(shr_10) 237 cmp $12, %edx 238 je L(shr_12) 239 jmp L(shr_14) 240#endif 241 242 .p2align 4 243L(shr_0): 244 cmp $80, %ecx 245 jae L(shr_0_gobble) 246 lea -48(%ecx), %ecx 247 xor %eax, %eax 248 movaps (%esi), %xmm1 249 pcmpeqb (%edi), %xmm1 250 movaps 16(%esi), %xmm2 251 pcmpeqb 16(%edi), %xmm2 252 pand %xmm1, %xmm2 253 pmovmskb %xmm2, %edx 254 add $32, %edi 255 add $32, %esi 256 sub $0xffff, %edx 257 jnz L(exit) 258 259 lea (%ecx, %edi,1), %eax 260 lea (%ecx, %esi,1), %edx 261 POP (%edi) 262 POP (%esi) 263 jmp L(less48bytes) 264 265 cfi_restore_state 266 cfi_remember_state 267 .p2align 4 268L(shr_0_gobble): 269 lea -48(%ecx), %ecx 270 movdqa (%esi), %xmm0 271 xor %eax, %eax 272 pcmpeqb (%edi), %xmm0 273 sub $32, %ecx 274 movdqa 16(%esi), %xmm2 275 pcmpeqb 16(%edi), %xmm2 276L(shr_0_gobble_loop): 277 pand %xmm0, %xmm2 278 sub $32, %ecx 279 pmovmskb %xmm2, %edx 280 movdqa %xmm0, %xmm1 281 movdqa 32(%esi), %xmm0 282 movdqa 48(%esi), %xmm2 283 sbb $0xffff, %edx 284 pcmpeqb 32(%edi), %xmm0 285 pcmpeqb 48(%edi), %xmm2 286 lea 32(%edi), %edi 287 lea 32(%esi), %esi 288 jz L(shr_0_gobble_loop) 289 290 pand %xmm0, %xmm2 291 cmp $0, %ecx 292 jge L(shr_0_gobble_loop_next) 293 inc %edx 294 add $32, %ecx 295L(shr_0_gobble_loop_next): 296 test %edx, %edx 297 jnz L(exit) 298 299 pmovmskb %xmm2, %edx 300 movdqa %xmm0, %xmm1 301 lea 32(%edi), %edi 302 lea 32(%esi), %esi 303 sub $0xffff, %edx 304 jnz L(exit) 305 lea (%ecx, %edi,1), %eax 306 lea (%ecx, %esi,1), %edx 307 POP (%edi) 308 POP (%esi) 309 jmp L(less48bytes) 310 311#if !defined(USE_WCHAR) && !defined(USE_UTF16) 312 cfi_restore_state 313 cfi_remember_state 314 .p2align 4 315L(shr_1): 316 cmp $80, %ecx 317 lea -48(%ecx), %ecx 318 mov %edx, %eax 319 jae L(shr_1_gobble) 320 321 movdqa 16(%esi), %xmm1 322 movdqa %xmm1, %xmm2 323 palignr $1,(%esi), %xmm1 324 pcmpeqb (%edi), %xmm1 325 326 movdqa 32(%esi), %xmm3 327 palignr $1,%xmm2, %xmm3 328 pcmpeqb 16(%edi), %xmm3 329 330 pand %xmm1, %xmm3 331 pmovmskb %xmm3, %edx 332 lea 32(%edi), %edi 333 lea 32(%esi), %esi 334 sub $0xffff, %edx 335 jnz L(exit) 336 lea (%ecx, %edi,1), %eax 337 lea 1(%ecx, %esi,1), %edx 338 POP (%edi) 339 POP (%esi) 340 jmp L(less48bytes) 341 342 cfi_restore_state 343 cfi_remember_state 344 .p2align 4 345L(shr_1_gobble): 346 sub $32, %ecx 347 movdqa 16(%esi), %xmm0 348 palignr $1,(%esi), %xmm0 349 pcmpeqb (%edi), %xmm0 350 351 movdqa 32(%esi), %xmm3 352 palignr $1,16(%esi), %xmm3 353 pcmpeqb 16(%edi), %xmm3 354 355L(shr_1_gobble_loop): 356 pand %xmm0, %xmm3 357 sub $32, %ecx 358 pmovmskb %xmm3, %edx 359 movdqa %xmm0, %xmm1 360 361 movdqa 64(%esi), %xmm3 362 palignr $1,48(%esi), %xmm3 363 sbb $0xffff, %edx 364 movdqa 48(%esi), %xmm0 365 palignr $1,32(%esi), %xmm0 366 pcmpeqb 32(%edi), %xmm0 367 lea 32(%esi), %esi 368 pcmpeqb 48(%edi), %xmm3 369 370 lea 32(%edi), %edi 371 jz L(shr_1_gobble_loop) 372 pand %xmm0, %xmm3 373 374 cmp $0, %ecx 375 jge L(shr_1_gobble_next) 376 inc %edx 377 add $32, %ecx 378L(shr_1_gobble_next): 379 test %edx, %edx 380 jnz L(exit) 381 382 pmovmskb %xmm3, %edx 383 movdqa %xmm0, %xmm1 384 lea 32(%edi), %edi 385 lea 32(%esi), %esi 386 sub $0xffff, %edx 387 jnz L(exit) 388 389 lea (%ecx, %edi,1), %eax 390 lea 1(%ecx, %esi,1), %edx 391 POP (%edi) 392 POP (%esi) 393 jmp L(less48bytes) 394#endif 395 396 397#if !defined(USE_WCHAR) 398 cfi_restore_state 399 cfi_remember_state 400 .p2align 4 401L(shr_2): 402 cmp $80, %ecx 403 lea -48(%ecx), %ecx 404 mov %edx, %eax 405 jae L(shr_2_gobble) 406 407 movdqa 16(%esi), %xmm1 408 movdqa %xmm1, %xmm2 409 palignr $2,(%esi), %xmm1 410 pcmpeqb (%edi), %xmm1 411 412 movdqa 32(%esi), %xmm3 413 palignr $2,%xmm2, %xmm3 414 pcmpeqb 16(%edi), %xmm3 415 416 pand %xmm1, %xmm3 417 pmovmskb %xmm3, %edx 418 lea 32(%edi), %edi 419 lea 32(%esi), %esi 420 sub $0xffff, %edx 421 jnz L(exit) 422 lea (%ecx, %edi,1), %eax 423 lea 2(%ecx, %esi,1), %edx 424 POP (%edi) 425 POP (%esi) 426 jmp L(less48bytes) 427 428 cfi_restore_state 429 cfi_remember_state 430 .p2align 4 431L(shr_2_gobble): 432 sub $32, %ecx 433 movdqa 16(%esi), %xmm0 434 palignr $2,(%esi), %xmm0 435 pcmpeqb (%edi), %xmm0 436 437 movdqa 32(%esi), %xmm3 438 palignr $2,16(%esi), %xmm3 439 pcmpeqb 16(%edi), %xmm3 440 441L(shr_2_gobble_loop): 442 pand %xmm0, %xmm3 443 sub $32, %ecx 444 pmovmskb %xmm3, %edx 445 movdqa %xmm0, %xmm1 446 447 movdqa 64(%esi), %xmm3 448 palignr $2,48(%esi), %xmm3 449 sbb $0xffff, %edx 450 movdqa 48(%esi), %xmm0 451 palignr $2,32(%esi), %xmm0 452 pcmpeqb 32(%edi), %xmm0 453 lea 32(%esi), %esi 454 pcmpeqb 48(%edi), %xmm3 455 456 lea 32(%edi), %edi 457 jz L(shr_2_gobble_loop) 458 pand %xmm0, %xmm3 459 460 cmp $0, %ecx 461 jge L(shr_2_gobble_next) 462 inc %edx 463 add $32, %ecx 464L(shr_2_gobble_next): 465 test %edx, %edx 466 jnz L(exit) 467 468 pmovmskb %xmm3, %edx 469 movdqa %xmm0, %xmm1 470 lea 32(%edi), %edi 471 lea 32(%esi), %esi 472 sub $0xffff, %edx 473 jnz L(exit) 474 475 lea (%ecx, %edi,1), %eax 476 lea 2(%ecx, %esi,1), %edx 477 POP (%edi) 478 POP (%esi) 479 jmp L(less48bytes) 480#endif 481 482#if !defined(USE_WCHAR) && !defined(USE_UTF16) 483 cfi_restore_state 484 cfi_remember_state 485 .p2align 4 486L(shr_3): 487 cmp $80, %ecx 488 lea -48(%ecx), %ecx 489 mov %edx, %eax 490 jae L(shr_3_gobble) 491 492 movdqa 16(%esi), %xmm1 493 movdqa %xmm1, %xmm2 494 palignr $3,(%esi), %xmm1 495 pcmpeqb (%edi), %xmm1 496 497 movdqa 32(%esi), %xmm3 498 palignr $3,%xmm2, %xmm3 499 pcmpeqb 16(%edi), %xmm3 500 501 pand %xmm1, %xmm3 502 pmovmskb %xmm3, %edx 503 lea 32(%edi), %edi 504 lea 32(%esi), %esi 505 sub $0xffff, %edx 506 jnz L(exit) 507 lea (%ecx, %edi,1), %eax 508 lea 3(%ecx, %esi,1), %edx 509 POP (%edi) 510 POP (%esi) 511 jmp L(less48bytes) 512 513 cfi_restore_state 514 cfi_remember_state 515 .p2align 4 516L(shr_3_gobble): 517 sub $32, %ecx 518 movdqa 16(%esi), %xmm0 519 palignr $3,(%esi), %xmm0 520 pcmpeqb (%edi), %xmm0 521 522 movdqa 32(%esi), %xmm3 523 palignr $3,16(%esi), %xmm3 524 pcmpeqb 16(%edi), %xmm3 525 526L(shr_3_gobble_loop): 527 pand %xmm0, %xmm3 528 sub $32, %ecx 529 pmovmskb %xmm3, %edx 530 movdqa %xmm0, %xmm1 531 532 movdqa 64(%esi), %xmm3 533 palignr $3,48(%esi), %xmm3 534 sbb $0xffff, %edx 535 movdqa 48(%esi), %xmm0 536 palignr $3,32(%esi), %xmm0 537 pcmpeqb 32(%edi), %xmm0 538 lea 32(%esi), %esi 539 pcmpeqb 48(%edi), %xmm3 540 541 lea 32(%edi), %edi 542 jz L(shr_3_gobble_loop) 543 pand %xmm0, %xmm3 544 545 cmp $0, %ecx 546 jge L(shr_3_gobble_next) 547 inc %edx 548 add $32, %ecx 549L(shr_3_gobble_next): 550 test %edx, %edx 551 jnz L(exit) 552 553 pmovmskb %xmm3, %edx 554 movdqa %xmm0, %xmm1 555 lea 32(%edi), %edi 556 lea 32(%esi), %esi 557 sub $0xffff, %edx 558 jnz L(exit) 559 560 lea (%ecx, %edi,1), %eax 561 lea 3(%ecx, %esi,1), %edx 562 POP (%edi) 563 POP (%esi) 564 jmp L(less48bytes) 565#endif 566 567 cfi_restore_state 568 cfi_remember_state 569 .p2align 4 570L(shr_4): 571 cmp $80, %ecx 572 lea -48(%ecx), %ecx 573 mov %edx, %eax 574 jae L(shr_4_gobble) 575 576 movdqa 16(%esi), %xmm1 577 movdqa %xmm1, %xmm2 578 palignr $4,(%esi), %xmm1 579 pcmpeqb (%edi), %xmm1 580 581 movdqa 32(%esi), %xmm3 582 palignr $4,%xmm2, %xmm3 583 pcmpeqb 16(%edi), %xmm3 584 585 pand %xmm1, %xmm3 586 pmovmskb %xmm3, %edx 587 lea 32(%edi), %edi 588 lea 32(%esi), %esi 589 sub $0xffff, %edx 590 jnz L(exit) 591 lea (%ecx, %edi,1), %eax 592 lea 4(%ecx, %esi,1), %edx 593 POP (%edi) 594 POP (%esi) 595 jmp L(less48bytes) 596 597 cfi_restore_state 598 cfi_remember_state 599 .p2align 4 600L(shr_4_gobble): 601 sub $32, %ecx 602 movdqa 16(%esi), %xmm0 603 palignr $4,(%esi), %xmm0 604 pcmpeqb (%edi), %xmm0 605 606 movdqa 32(%esi), %xmm3 607 palignr $4,16(%esi), %xmm3 608 pcmpeqb 16(%edi), %xmm3 609 610L(shr_4_gobble_loop): 611 pand %xmm0, %xmm3 612 sub $32, %ecx 613 pmovmskb %xmm3, %edx 614 movdqa %xmm0, %xmm1 615 616 movdqa 64(%esi), %xmm3 617 palignr $4,48(%esi), %xmm3 618 sbb $0xffff, %edx 619 movdqa 48(%esi), %xmm0 620 palignr $4,32(%esi), %xmm0 621 pcmpeqb 32(%edi), %xmm0 622 lea 32(%esi), %esi 623 pcmpeqb 48(%edi), %xmm3 624 625 lea 32(%edi), %edi 626 jz L(shr_4_gobble_loop) 627 pand %xmm0, %xmm3 628 629 cmp $0, %ecx 630 jge L(shr_4_gobble_next) 631 inc %edx 632 add $32, %ecx 633L(shr_4_gobble_next): 634 test %edx, %edx 635 jnz L(exit) 636 637 pmovmskb %xmm3, %edx 638 movdqa %xmm0, %xmm1 639 lea 32(%edi), %edi 640 lea 32(%esi), %esi 641 sub $0xffff, %edx 642 jnz L(exit) 643 644 lea (%ecx, %edi,1), %eax 645 lea 4(%ecx, %esi,1), %edx 646 POP (%edi) 647 POP (%esi) 648 jmp L(less48bytes) 649 650#if !defined(USE_WCHAR) && !defined(USE_UTF16) 651 cfi_restore_state 652 cfi_remember_state 653 .p2align 4 654L(shr_5): 655 cmp $80, %ecx 656 lea -48(%ecx), %ecx 657 mov %edx, %eax 658 jae L(shr_5_gobble) 659 660 movdqa 16(%esi), %xmm1 661 movdqa %xmm1, %xmm2 662 palignr $5,(%esi), %xmm1 663 pcmpeqb (%edi), %xmm1 664 665 movdqa 32(%esi), %xmm3 666 palignr $5,%xmm2, %xmm3 667 pcmpeqb 16(%edi), %xmm3 668 669 pand %xmm1, %xmm3 670 pmovmskb %xmm3, %edx 671 lea 32(%edi), %edi 672 lea 32(%esi), %esi 673 sub $0xffff, %edx 674 jnz L(exit) 675 lea (%ecx, %edi,1), %eax 676 lea 5(%ecx, %esi,1), %edx 677 POP (%edi) 678 POP (%esi) 679 jmp L(less48bytes) 680 681 cfi_restore_state 682 cfi_remember_state 683 .p2align 4 684L(shr_5_gobble): 685 sub $32, %ecx 686 movdqa 16(%esi), %xmm0 687 palignr $5,(%esi), %xmm0 688 pcmpeqb (%edi), %xmm0 689 690 movdqa 32(%esi), %xmm3 691 palignr $5,16(%esi), %xmm3 692 pcmpeqb 16(%edi), %xmm3 693 694L(shr_5_gobble_loop): 695 pand %xmm0, %xmm3 696 sub $32, %ecx 697 pmovmskb %xmm3, %edx 698 movdqa %xmm0, %xmm1 699 700 movdqa 64(%esi), %xmm3 701 palignr $5,48(%esi), %xmm3 702 sbb $0xffff, %edx 703 movdqa 48(%esi), %xmm0 704 palignr $5,32(%esi), %xmm0 705 pcmpeqb 32(%edi), %xmm0 706 lea 32(%esi), %esi 707 pcmpeqb 48(%edi), %xmm3 708 709 lea 32(%edi), %edi 710 jz L(shr_5_gobble_loop) 711 pand %xmm0, %xmm3 712 713 cmp $0, %ecx 714 jge L(shr_5_gobble_next) 715 inc %edx 716 add $32, %ecx 717L(shr_5_gobble_next): 718 test %edx, %edx 719 jnz L(exit) 720 721 pmovmskb %xmm3, %edx 722 movdqa %xmm0, %xmm1 723 lea 32(%edi), %edi 724 lea 32(%esi), %esi 725 sub $0xffff, %edx 726 jnz L(exit) 727 728 lea (%ecx, %edi,1), %eax 729 lea 5(%ecx, %esi,1), %edx 730 POP (%edi) 731 POP (%esi) 732 jmp L(less48bytes) 733#endif 734 735#if !defined(USE_WCHAR) 736 cfi_restore_state 737 cfi_remember_state 738 .p2align 4 739L(shr_6): 740 cmp $80, %ecx 741 lea -48(%ecx), %ecx 742 mov %edx, %eax 743 jae L(shr_6_gobble) 744 745 movdqa 16(%esi), %xmm1 746 movdqa %xmm1, %xmm2 747 palignr $6,(%esi), %xmm1 748 pcmpeqb (%edi), %xmm1 749 750 movdqa 32(%esi), %xmm3 751 palignr $6,%xmm2, %xmm3 752 pcmpeqb 16(%edi), %xmm3 753 754 pand %xmm1, %xmm3 755 pmovmskb %xmm3, %edx 756 lea 32(%edi), %edi 757 lea 32(%esi), %esi 758 sub $0xffff, %edx 759 jnz L(exit) 760 lea (%ecx, %edi,1), %eax 761 lea 6(%ecx, %esi,1), %edx 762 POP (%edi) 763 POP (%esi) 764 jmp L(less48bytes) 765 766 cfi_restore_state 767 cfi_remember_state 768 .p2align 4 769L(shr_6_gobble): 770 sub $32, %ecx 771 movdqa 16(%esi), %xmm0 772 palignr $6,(%esi), %xmm0 773 pcmpeqb (%edi), %xmm0 774 775 movdqa 32(%esi), %xmm3 776 palignr $6,16(%esi), %xmm3 777 pcmpeqb 16(%edi), %xmm3 778 779L(shr_6_gobble_loop): 780 pand %xmm0, %xmm3 781 sub $32, %ecx 782 pmovmskb %xmm3, %edx 783 movdqa %xmm0, %xmm1 784 785 movdqa 64(%esi), %xmm3 786 palignr $6,48(%esi), %xmm3 787 sbb $0xffff, %edx 788 movdqa 48(%esi), %xmm0 789 palignr $6,32(%esi), %xmm0 790 pcmpeqb 32(%edi), %xmm0 791 lea 32(%esi), %esi 792 pcmpeqb 48(%edi), %xmm3 793 794 lea 32(%edi), %edi 795 jz L(shr_6_gobble_loop) 796 pand %xmm0, %xmm3 797 798 cmp $0, %ecx 799 jge L(shr_6_gobble_next) 800 inc %edx 801 add $32, %ecx 802L(shr_6_gobble_next): 803 test %edx, %edx 804 jnz L(exit) 805 806 pmovmskb %xmm3, %edx 807 movdqa %xmm0, %xmm1 808 lea 32(%edi), %edi 809 lea 32(%esi), %esi 810 sub $0xffff, %edx 811 jnz L(exit) 812 813 lea (%ecx, %edi,1), %eax 814 lea 6(%ecx, %esi,1), %edx 815 POP (%edi) 816 POP (%esi) 817 jmp L(less48bytes) 818#endif 819 820#if !defined(USE_WCHAR) && !defined(USE_UTF16) 821 cfi_restore_state 822 cfi_remember_state 823 .p2align 4 824L(shr_7): 825 cmp $80, %ecx 826 lea -48(%ecx), %ecx 827 mov %edx, %eax 828 jae L(shr_7_gobble) 829 830 movdqa 16(%esi), %xmm1 831 movdqa %xmm1, %xmm2 832 palignr $7,(%esi), %xmm1 833 pcmpeqb (%edi), %xmm1 834 835 movdqa 32(%esi), %xmm3 836 palignr $7,%xmm2, %xmm3 837 pcmpeqb 16(%edi), %xmm3 838 839 pand %xmm1, %xmm3 840 pmovmskb %xmm3, %edx 841 lea 32(%edi), %edi 842 lea 32(%esi), %esi 843 sub $0xffff, %edx 844 jnz L(exit) 845 lea (%ecx, %edi,1), %eax 846 lea 7(%ecx, %esi,1), %edx 847 POP (%edi) 848 POP (%esi) 849 jmp L(less48bytes) 850 851 cfi_restore_state 852 cfi_remember_state 853 .p2align 4 854L(shr_7_gobble): 855 sub $32, %ecx 856 movdqa 16(%esi), %xmm0 857 palignr $7,(%esi), %xmm0 858 pcmpeqb (%edi), %xmm0 859 860 movdqa 32(%esi), %xmm3 861 palignr $7,16(%esi), %xmm3 862 pcmpeqb 16(%edi), %xmm3 863 864L(shr_7_gobble_loop): 865 pand %xmm0, %xmm3 866 sub $32, %ecx 867 pmovmskb %xmm3, %edx 868 movdqa %xmm0, %xmm1 869 870 movdqa 64(%esi), %xmm3 871 palignr $7,48(%esi), %xmm3 872 sbb $0xffff, %edx 873 movdqa 48(%esi), %xmm0 874 palignr $7,32(%esi), %xmm0 875 pcmpeqb 32(%edi), %xmm0 876 lea 32(%esi), %esi 877 pcmpeqb 48(%edi), %xmm3 878 879 lea 32(%edi), %edi 880 jz L(shr_7_gobble_loop) 881 pand %xmm0, %xmm3 882 883 cmp $0, %ecx 884 jge L(shr_7_gobble_next) 885 inc %edx 886 add $32, %ecx 887L(shr_7_gobble_next): 888 test %edx, %edx 889 jnz L(exit) 890 891 pmovmskb %xmm3, %edx 892 movdqa %xmm0, %xmm1 893 lea 32(%edi), %edi 894 lea 32(%esi), %esi 895 sub $0xffff, %edx 896 jnz L(exit) 897 898 lea (%ecx, %edi,1), %eax 899 lea 7(%ecx, %esi,1), %edx 900 POP (%edi) 901 POP (%esi) 902 jmp L(less48bytes) 903#endif 904 905 cfi_restore_state 906 cfi_remember_state 907 .p2align 4 908L(shr_8): 909 cmp $80, %ecx 910 lea -48(%ecx), %ecx 911 mov %edx, %eax 912 jae L(shr_8_gobble) 913 914 movdqa 16(%esi), %xmm1 915 movdqa %xmm1, %xmm2 916 palignr $8,(%esi), %xmm1 917 pcmpeqb (%edi), %xmm1 918 919 movdqa 32(%esi), %xmm3 920 palignr $8,%xmm2, %xmm3 921 pcmpeqb 16(%edi), %xmm3 922 923 pand %xmm1, %xmm3 924 pmovmskb %xmm3, %edx 925 lea 32(%edi), %edi 926 lea 32(%esi), %esi 927 sub $0xffff, %edx 928 jnz L(exit) 929 lea (%ecx, %edi,1), %eax 930 lea 8(%ecx, %esi,1), %edx 931 POP (%edi) 932 POP (%esi) 933 jmp L(less48bytes) 934 935 cfi_restore_state 936 cfi_remember_state 937 .p2align 4 938L(shr_8_gobble): 939 sub $32, %ecx 940 movdqa 16(%esi), %xmm0 941 palignr $8,(%esi), %xmm0 942 pcmpeqb (%edi), %xmm0 943 944 movdqa 32(%esi), %xmm3 945 palignr $8,16(%esi), %xmm3 946 pcmpeqb 16(%edi), %xmm3 947 948L(shr_8_gobble_loop): 949 pand %xmm0, %xmm3 950 sub $32, %ecx 951 pmovmskb %xmm3, %edx 952 movdqa %xmm0, %xmm1 953 954 movdqa 64(%esi), %xmm3 955 palignr $8,48(%esi), %xmm3 956 sbb $0xffff, %edx 957 movdqa 48(%esi), %xmm0 958 palignr $8,32(%esi), %xmm0 959 pcmpeqb 32(%edi), %xmm0 960 lea 32(%esi), %esi 961 pcmpeqb 48(%edi), %xmm3 962 963 lea 32(%edi), %edi 964 jz L(shr_8_gobble_loop) 965 pand %xmm0, %xmm3 966 967 cmp $0, %ecx 968 jge L(shr_8_gobble_next) 969 inc %edx 970 add $32, %ecx 971L(shr_8_gobble_next): 972 test %edx, %edx 973 jnz L(exit) 974 975 pmovmskb %xmm3, %edx 976 movdqa %xmm0, %xmm1 977 lea 32(%edi), %edi 978 lea 32(%esi), %esi 979 sub $0xffff, %edx 980 jnz L(exit) 981 982 lea (%ecx, %edi,1), %eax 983 lea 8(%ecx, %esi,1), %edx 984 POP (%edi) 985 POP (%esi) 986 jmp L(less48bytes) 987 988#if !defined(USE_WCHAR) && !defined(USE_UTF16) 989 cfi_restore_state 990 cfi_remember_state 991 .p2align 4 992L(shr_9): 993 cmp $80, %ecx 994 lea -48(%ecx), %ecx 995 mov %edx, %eax 996 jae L(shr_9_gobble) 997 998 movdqa 16(%esi), %xmm1 999 movdqa %xmm1, %xmm2 1000 palignr $9,(%esi), %xmm1 1001 pcmpeqb (%edi), %xmm1 1002 1003 movdqa 32(%esi), %xmm3 1004 palignr $9,%xmm2, %xmm3 1005 pcmpeqb 16(%edi), %xmm3 1006 1007 pand %xmm1, %xmm3 1008 pmovmskb %xmm3, %edx 1009 lea 32(%edi), %edi 1010 lea 32(%esi), %esi 1011 sub $0xffff, %edx 1012 jnz L(exit) 1013 lea (%ecx, %edi,1), %eax 1014 lea 9(%ecx, %esi,1), %edx 1015 POP (%edi) 1016 POP (%esi) 1017 jmp L(less48bytes) 1018 1019 cfi_restore_state 1020 cfi_remember_state 1021 .p2align 4 1022L(shr_9_gobble): 1023 sub $32, %ecx 1024 movdqa 16(%esi), %xmm0 1025 palignr $9,(%esi), %xmm0 1026 pcmpeqb (%edi), %xmm0 1027 1028 movdqa 32(%esi), %xmm3 1029 palignr $9,16(%esi), %xmm3 1030 pcmpeqb 16(%edi), %xmm3 1031 1032L(shr_9_gobble_loop): 1033 pand %xmm0, %xmm3 1034 sub $32, %ecx 1035 pmovmskb %xmm3, %edx 1036 movdqa %xmm0, %xmm1 1037 1038 movdqa 64(%esi), %xmm3 1039 palignr $9,48(%esi), %xmm3 1040 sbb $0xffff, %edx 1041 movdqa 48(%esi), %xmm0 1042 palignr $9,32(%esi), %xmm0 1043 pcmpeqb 32(%edi), %xmm0 1044 lea 32(%esi), %esi 1045 pcmpeqb 48(%edi), %xmm3 1046 1047 lea 32(%edi), %edi 1048 jz L(shr_9_gobble_loop) 1049 pand %xmm0, %xmm3 1050 1051 cmp $0, %ecx 1052 jge L(shr_9_gobble_next) 1053 inc %edx 1054 add $32, %ecx 1055L(shr_9_gobble_next): 1056 test %edx, %edx 1057 jnz L(exit) 1058 1059 pmovmskb %xmm3, %edx 1060 movdqa %xmm0, %xmm1 1061 lea 32(%edi), %edi 1062 lea 32(%esi), %esi 1063 sub $0xffff, %edx 1064 jnz L(exit) 1065 1066 lea (%ecx, %edi,1), %eax 1067 lea 9(%ecx, %esi,1), %edx 1068 POP (%edi) 1069 POP (%esi) 1070 jmp L(less48bytes) 1071#endif 1072 1073#if !defined(USE_WCHAR) 1074 cfi_restore_state 1075 cfi_remember_state 1076 .p2align 4 1077L(shr_10): 1078 cmp $80, %ecx 1079 lea -48(%ecx), %ecx 1080 mov %edx, %eax 1081 jae L(shr_10_gobble) 1082 1083 movdqa 16(%esi), %xmm1 1084 movdqa %xmm1, %xmm2 1085 palignr $10, (%esi), %xmm1 1086 pcmpeqb (%edi), %xmm1 1087 1088 movdqa 32(%esi), %xmm3 1089 palignr $10,%xmm2, %xmm3 1090 pcmpeqb 16(%edi), %xmm3 1091 1092 pand %xmm1, %xmm3 1093 pmovmskb %xmm3, %edx 1094 lea 32(%edi), %edi 1095 lea 32(%esi), %esi 1096 sub $0xffff, %edx 1097 jnz L(exit) 1098 lea (%ecx, %edi,1), %eax 1099 lea 10(%ecx, %esi,1), %edx 1100 POP (%edi) 1101 POP (%esi) 1102 jmp L(less48bytes) 1103 1104 cfi_restore_state 1105 cfi_remember_state 1106 .p2align 4 1107L(shr_10_gobble): 1108 sub $32, %ecx 1109 movdqa 16(%esi), %xmm0 1110 palignr $10, (%esi), %xmm0 1111 pcmpeqb (%edi), %xmm0 1112 1113 movdqa 32(%esi), %xmm3 1114 palignr $10, 16(%esi), %xmm3 1115 pcmpeqb 16(%edi), %xmm3 1116 1117L(shr_10_gobble_loop): 1118 pand %xmm0, %xmm3 1119 sub $32, %ecx 1120 pmovmskb %xmm3, %edx 1121 movdqa %xmm0, %xmm1 1122 1123 movdqa 64(%esi), %xmm3 1124 palignr $10,48(%esi), %xmm3 1125 sbb $0xffff, %edx 1126 movdqa 48(%esi), %xmm0 1127 palignr $10,32(%esi), %xmm0 1128 pcmpeqb 32(%edi), %xmm0 1129 lea 32(%esi), %esi 1130 pcmpeqb 48(%edi), %xmm3 1131 1132 lea 32(%edi), %edi 1133 jz L(shr_10_gobble_loop) 1134 pand %xmm0, %xmm3 1135 1136 cmp $0, %ecx 1137 jge L(shr_10_gobble_next) 1138 inc %edx 1139 add $32, %ecx 1140L(shr_10_gobble_next): 1141 test %edx, %edx 1142 jnz L(exit) 1143 1144 pmovmskb %xmm3, %edx 1145 movdqa %xmm0, %xmm1 1146 lea 32(%edi), %edi 1147 lea 32(%esi), %esi 1148 sub $0xffff, %edx 1149 jnz L(exit) 1150 1151 lea (%ecx, %edi,1), %eax 1152 lea 10(%ecx, %esi,1), %edx 1153 POP (%edi) 1154 POP (%esi) 1155 jmp L(less48bytes) 1156#endif 1157 1158#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1159 cfi_restore_state 1160 cfi_remember_state 1161 .p2align 4 1162L(shr_11): 1163 cmp $80, %ecx 1164 lea -48(%ecx), %ecx 1165 mov %edx, %eax 1166 jae L(shr_11_gobble) 1167 1168 movdqa 16(%esi), %xmm1 1169 movdqa %xmm1, %xmm2 1170 palignr $11, (%esi), %xmm1 1171 pcmpeqb (%edi), %xmm1 1172 1173 movdqa 32(%esi), %xmm3 1174 palignr $11, %xmm2, %xmm3 1175 pcmpeqb 16(%edi), %xmm3 1176 1177 pand %xmm1, %xmm3 1178 pmovmskb %xmm3, %edx 1179 lea 32(%edi), %edi 1180 lea 32(%esi), %esi 1181 sub $0xffff, %edx 1182 jnz L(exit) 1183 lea (%ecx, %edi,1), %eax 1184 lea 11(%ecx, %esi,1), %edx 1185 POP (%edi) 1186 POP (%esi) 1187 jmp L(less48bytes) 1188 1189 cfi_restore_state 1190 cfi_remember_state 1191 .p2align 4 1192L(shr_11_gobble): 1193 sub $32, %ecx 1194 movdqa 16(%esi), %xmm0 1195 palignr $11, (%esi), %xmm0 1196 pcmpeqb (%edi), %xmm0 1197 1198 movdqa 32(%esi), %xmm3 1199 palignr $11, 16(%esi), %xmm3 1200 pcmpeqb 16(%edi), %xmm3 1201 1202L(shr_11_gobble_loop): 1203 pand %xmm0, %xmm3 1204 sub $32, %ecx 1205 pmovmskb %xmm3, %edx 1206 movdqa %xmm0, %xmm1 1207 1208 movdqa 64(%esi), %xmm3 1209 palignr $11,48(%esi), %xmm3 1210 sbb $0xffff, %edx 1211 movdqa 48(%esi), %xmm0 1212 palignr $11,32(%esi), %xmm0 1213 pcmpeqb 32(%edi), %xmm0 1214 lea 32(%esi), %esi 1215 pcmpeqb 48(%edi), %xmm3 1216 1217 lea 32(%edi), %edi 1218 jz L(shr_11_gobble_loop) 1219 pand %xmm0, %xmm3 1220 1221 cmp $0, %ecx 1222 jge L(shr_11_gobble_next) 1223 inc %edx 1224 add $32, %ecx 1225L(shr_11_gobble_next): 1226 test %edx, %edx 1227 jnz L(exit) 1228 1229 pmovmskb %xmm3, %edx 1230 movdqa %xmm0, %xmm1 1231 lea 32(%edi), %edi 1232 lea 32(%esi), %esi 1233 sub $0xffff, %edx 1234 jnz L(exit) 1235 1236 lea (%ecx, %edi,1), %eax 1237 lea 11(%ecx, %esi,1), %edx 1238 POP (%edi) 1239 POP (%esi) 1240 jmp L(less48bytes) 1241#endif 1242 1243 cfi_restore_state 1244 cfi_remember_state 1245 .p2align 4 1246L(shr_12): 1247 cmp $80, %ecx 1248 lea -48(%ecx), %ecx 1249 mov %edx, %eax 1250 jae L(shr_12_gobble) 1251 1252 movdqa 16(%esi), %xmm1 1253 movdqa %xmm1, %xmm2 1254 palignr $12, (%esi), %xmm1 1255 pcmpeqb (%edi), %xmm1 1256 1257 movdqa 32(%esi), %xmm3 1258 palignr $12, %xmm2, %xmm3 1259 pcmpeqb 16(%edi), %xmm3 1260 1261 pand %xmm1, %xmm3 1262 pmovmskb %xmm3, %edx 1263 lea 32(%edi), %edi 1264 lea 32(%esi), %esi 1265 sub $0xffff, %edx 1266 jnz L(exit) 1267 lea (%ecx, %edi,1), %eax 1268 lea 12(%ecx, %esi,1), %edx 1269 POP (%edi) 1270 POP (%esi) 1271 jmp L(less48bytes) 1272 1273 cfi_restore_state 1274 cfi_remember_state 1275 .p2align 4 1276L(shr_12_gobble): 1277 sub $32, %ecx 1278 movdqa 16(%esi), %xmm0 1279 palignr $12, (%esi), %xmm0 1280 pcmpeqb (%edi), %xmm0 1281 1282 movdqa 32(%esi), %xmm3 1283 palignr $12, 16(%esi), %xmm3 1284 pcmpeqb 16(%edi), %xmm3 1285 1286L(shr_12_gobble_loop): 1287 pand %xmm0, %xmm3 1288 sub $32, %ecx 1289 pmovmskb %xmm3, %edx 1290 movdqa %xmm0, %xmm1 1291 1292 movdqa 64(%esi), %xmm3 1293 palignr $12,48(%esi), %xmm3 1294 sbb $0xffff, %edx 1295 movdqa 48(%esi), %xmm0 1296 palignr $12,32(%esi), %xmm0 1297 pcmpeqb 32(%edi), %xmm0 1298 lea 32(%esi), %esi 1299 pcmpeqb 48(%edi), %xmm3 1300 1301 lea 32(%edi), %edi 1302 jz L(shr_12_gobble_loop) 1303 pand %xmm0, %xmm3 1304 1305 cmp $0, %ecx 1306 jge L(shr_12_gobble_next) 1307 inc %edx 1308 add $32, %ecx 1309L(shr_12_gobble_next): 1310 test %edx, %edx 1311 jnz L(exit) 1312 1313 pmovmskb %xmm3, %edx 1314 movdqa %xmm0, %xmm1 1315 lea 32(%edi), %edi 1316 lea 32(%esi), %esi 1317 sub $0xffff, %edx 1318 jnz L(exit) 1319 1320 lea (%ecx, %edi,1), %eax 1321 lea 12(%ecx, %esi,1), %edx 1322 POP (%edi) 1323 POP (%esi) 1324 jmp L(less48bytes) 1325 1326#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1327 cfi_restore_state 1328 cfi_remember_state 1329 .p2align 4 1330L(shr_13): 1331 cmp $80, %ecx 1332 lea -48(%ecx), %ecx 1333 mov %edx, %eax 1334 jae L(shr_13_gobble) 1335 1336 movdqa 16(%esi), %xmm1 1337 movdqa %xmm1, %xmm2 1338 palignr $13, (%esi), %xmm1 1339 pcmpeqb (%edi), %xmm1 1340 1341 movdqa 32(%esi), %xmm3 1342 palignr $13, %xmm2, %xmm3 1343 pcmpeqb 16(%edi), %xmm3 1344 1345 pand %xmm1, %xmm3 1346 pmovmskb %xmm3, %edx 1347 lea 32(%edi), %edi 1348 lea 32(%esi), %esi 1349 sub $0xffff, %edx 1350 jnz L(exit) 1351 lea (%ecx, %edi,1), %eax 1352 lea 13(%ecx, %esi,1), %edx 1353 POP (%edi) 1354 POP (%esi) 1355 jmp L(less48bytes) 1356 1357 cfi_restore_state 1358 cfi_remember_state 1359 .p2align 4 1360L(shr_13_gobble): 1361 sub $32, %ecx 1362 movdqa 16(%esi), %xmm0 1363 palignr $13, (%esi), %xmm0 1364 pcmpeqb (%edi), %xmm0 1365 1366 movdqa 32(%esi), %xmm3 1367 palignr $13, 16(%esi), %xmm3 1368 pcmpeqb 16(%edi), %xmm3 1369 1370L(shr_13_gobble_loop): 1371 pand %xmm0, %xmm3 1372 sub $32, %ecx 1373 pmovmskb %xmm3, %edx 1374 movdqa %xmm0, %xmm1 1375 1376 movdqa 64(%esi), %xmm3 1377 palignr $13,48(%esi), %xmm3 1378 sbb $0xffff, %edx 1379 movdqa 48(%esi), %xmm0 1380 palignr $13,32(%esi), %xmm0 1381 pcmpeqb 32(%edi), %xmm0 1382 lea 32(%esi), %esi 1383 pcmpeqb 48(%edi), %xmm3 1384 1385 lea 32(%edi), %edi 1386 jz L(shr_13_gobble_loop) 1387 pand %xmm0, %xmm3 1388 1389 cmp $0, %ecx 1390 jge L(shr_13_gobble_next) 1391 inc %edx 1392 add $32, %ecx 1393L(shr_13_gobble_next): 1394 test %edx, %edx 1395 jnz L(exit) 1396 1397 pmovmskb %xmm3, %edx 1398 movdqa %xmm0, %xmm1 1399 lea 32(%edi), %edi 1400 lea 32(%esi), %esi 1401 sub $0xffff, %edx 1402 jnz L(exit) 1403 1404 lea (%ecx, %edi,1), %eax 1405 lea 13(%ecx, %esi,1), %edx 1406 POP (%edi) 1407 POP (%esi) 1408 jmp L(less48bytes) 1409#endif 1410 1411#if !defined(USE_WCHAR) 1412 cfi_restore_state 1413 cfi_remember_state 1414 .p2align 4 1415L(shr_14): 1416 cmp $80, %ecx 1417 lea -48(%ecx), %ecx 1418 mov %edx, %eax 1419 jae L(shr_14_gobble) 1420 1421 movdqa 16(%esi), %xmm1 1422 movdqa %xmm1, %xmm2 1423 palignr $14, (%esi), %xmm1 1424 pcmpeqb (%edi), %xmm1 1425 1426 movdqa 32(%esi), %xmm3 1427 palignr $14, %xmm2, %xmm3 1428 pcmpeqb 16(%edi), %xmm3 1429 1430 pand %xmm1, %xmm3 1431 pmovmskb %xmm3, %edx 1432 lea 32(%edi), %edi 1433 lea 32(%esi), %esi 1434 sub $0xffff, %edx 1435 jnz L(exit) 1436 lea (%ecx, %edi,1), %eax 1437 lea 14(%ecx, %esi,1), %edx 1438 POP (%edi) 1439 POP (%esi) 1440 jmp L(less48bytes) 1441 1442 cfi_restore_state 1443 cfi_remember_state 1444 .p2align 4 1445L(shr_14_gobble): 1446 sub $32, %ecx 1447 movdqa 16(%esi), %xmm0 1448 palignr $14, (%esi), %xmm0 1449 pcmpeqb (%edi), %xmm0 1450 1451 movdqa 32(%esi), %xmm3 1452 palignr $14, 16(%esi), %xmm3 1453 pcmpeqb 16(%edi), %xmm3 1454 1455L(shr_14_gobble_loop): 1456 pand %xmm0, %xmm3 1457 sub $32, %ecx 1458 pmovmskb %xmm3, %edx 1459 movdqa %xmm0, %xmm1 1460 1461 movdqa 64(%esi), %xmm3 1462 palignr $14,48(%esi), %xmm3 1463 sbb $0xffff, %edx 1464 movdqa 48(%esi), %xmm0 1465 palignr $14,32(%esi), %xmm0 1466 pcmpeqb 32(%edi), %xmm0 1467 lea 32(%esi), %esi 1468 pcmpeqb 48(%edi), %xmm3 1469 1470 lea 32(%edi), %edi 1471 jz L(shr_14_gobble_loop) 1472 pand %xmm0, %xmm3 1473 1474 cmp $0, %ecx 1475 jge L(shr_14_gobble_next) 1476 inc %edx 1477 add $32, %ecx 1478L(shr_14_gobble_next): 1479 test %edx, %edx 1480 jnz L(exit) 1481 1482 pmovmskb %xmm3, %edx 1483 movdqa %xmm0, %xmm1 1484 lea 32(%edi), %edi 1485 lea 32(%esi), %esi 1486 sub $0xffff, %edx 1487 jnz L(exit) 1488 1489 lea (%ecx, %edi,1), %eax 1490 lea 14(%ecx, %esi,1), %edx 1491 POP (%edi) 1492 POP (%esi) 1493 jmp L(less48bytes) 1494#endif 1495 1496#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1497 cfi_restore_state 1498 cfi_remember_state 1499 .p2align 4 1500L(shr_15): 1501 cmp $80, %ecx 1502 lea -48(%ecx), %ecx 1503 mov %edx, %eax 1504 jae L(shr_15_gobble) 1505 1506 movdqa 16(%esi), %xmm1 1507 movdqa %xmm1, %xmm2 1508 palignr $15, (%esi), %xmm1 1509 pcmpeqb (%edi), %xmm1 1510 1511 movdqa 32(%esi), %xmm3 1512 palignr $15, %xmm2, %xmm3 1513 pcmpeqb 16(%edi), %xmm3 1514 1515 pand %xmm1, %xmm3 1516 pmovmskb %xmm3, %edx 1517 lea 32(%edi), %edi 1518 lea 32(%esi), %esi 1519 sub $0xffff, %edx 1520 jnz L(exit) 1521 lea (%ecx, %edi,1), %eax 1522 lea 15(%ecx, %esi,1), %edx 1523 POP (%edi) 1524 POP (%esi) 1525 jmp L(less48bytes) 1526 1527 cfi_restore_state 1528 cfi_remember_state 1529 .p2align 4 1530L(shr_15_gobble): 1531 sub $32, %ecx 1532 movdqa 16(%esi), %xmm0 1533 palignr $15, (%esi), %xmm0 1534 pcmpeqb (%edi), %xmm0 1535 1536 movdqa 32(%esi), %xmm3 1537 palignr $15, 16(%esi), %xmm3 1538 pcmpeqb 16(%edi), %xmm3 1539 1540L(shr_15_gobble_loop): 1541 pand %xmm0, %xmm3 1542 sub $32, %ecx 1543 pmovmskb %xmm3, %edx 1544 movdqa %xmm0, %xmm1 1545 1546 movdqa 64(%esi), %xmm3 1547 palignr $15,48(%esi), %xmm3 1548 sbb $0xffff, %edx 1549 movdqa 48(%esi), %xmm0 1550 palignr $15,32(%esi), %xmm0 1551 pcmpeqb 32(%edi), %xmm0 1552 lea 32(%esi), %esi 1553 pcmpeqb 48(%edi), %xmm3 1554 1555 lea 32(%edi), %edi 1556 jz L(shr_15_gobble_loop) 1557 pand %xmm0, %xmm3 1558 1559 cmp $0, %ecx 1560 jge L(shr_15_gobble_next) 1561 inc %edx 1562 add $32, %ecx 1563L(shr_15_gobble_next): 1564 test %edx, %edx 1565 jnz L(exit) 1566 1567 pmovmskb %xmm3, %edx 1568 movdqa %xmm0, %xmm1 1569 lea 32(%edi), %edi 1570 lea 32(%esi), %esi 1571 sub $0xffff, %edx 1572 jnz L(exit) 1573 1574 lea (%ecx, %edi,1), %eax 1575 lea 15(%ecx, %esi,1), %edx 1576 POP (%edi) 1577 POP (%esi) 1578 jmp L(less48bytes) 1579#endif 1580 1581 cfi_restore_state 1582 cfi_remember_state 1583 .p2align 4 1584L(exit): 1585 pmovmskb %xmm1, %ebx 1586 sub $0xffff, %ebx 1587 jz L(first16bytes) 1588 lea -16(%esi), %esi 1589 lea -16(%edi), %edi 1590 mov %ebx, %edx 1591 1592L(first16bytes): 1593 add %eax, %esi 1594L(less16bytes): 1595 1596#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1597 test %dl, %dl 1598 jz L(next_24_bytes) 1599 1600 test $0x01, %dl 1601 jnz L(Byte16) 1602 1603 test $0x02, %dl 1604 jnz L(Byte17) 1605 1606 test $0x04, %dl 1607 jnz L(Byte18) 1608 1609 test $0x08, %dl 1610 jnz L(Byte19) 1611 1612 test $0x10, %dl 1613 jnz L(Byte20) 1614 1615 test $0x20, %dl 1616 jnz L(Byte21) 1617 1618 test $0x40, %dl 1619 jnz L(Byte22) 1620L(Byte23): 1621 movzbl -9(%edi), %eax 1622 movzbl -9(%esi), %edx 1623 sub %edx, %eax 1624 RETURN 1625 1626 .p2align 4 1627L(Byte16): 1628 movzbl -16(%edi), %eax 1629 movzbl -16(%esi), %edx 1630 sub %edx, %eax 1631 RETURN 1632 1633 .p2align 4 1634L(Byte17): 1635 movzbl -15(%edi), %eax 1636 movzbl -15(%esi), %edx 1637 sub %edx, %eax 1638 RETURN 1639 1640 .p2align 4 1641L(Byte18): 1642 movzbl -14(%edi), %eax 1643 movzbl -14(%esi), %edx 1644 sub %edx, %eax 1645 RETURN 1646 1647 .p2align 4 1648L(Byte19): 1649 movzbl -13(%edi), %eax 1650 movzbl -13(%esi), %edx 1651 sub %edx, %eax 1652 RETURN 1653 1654 .p2align 4 1655L(Byte20): 1656 movzbl -12(%edi), %eax 1657 movzbl -12(%esi), %edx 1658 sub %edx, %eax 1659 RETURN 1660 1661 .p2align 4 1662L(Byte21): 1663 movzbl -11(%edi), %eax 1664 movzbl -11(%esi), %edx 1665 sub %edx, %eax 1666 RETURN 1667 1668 .p2align 4 1669L(Byte22): 1670 movzbl -10(%edi), %eax 1671 movzbl -10(%esi), %edx 1672 sub %edx, %eax 1673 RETURN 1674 1675 .p2align 4 1676L(next_24_bytes): 1677 lea 8(%edi), %edi 1678 lea 8(%esi), %esi 1679 test $0x01, %dh 1680 jnz L(Byte16) 1681 1682 test $0x02, %dh 1683 jnz L(Byte17) 1684 1685 test $0x04, %dh 1686 jnz L(Byte18) 1687 1688 test $0x08, %dh 1689 jnz L(Byte19) 1690 1691 test $0x10, %dh 1692 jnz L(Byte20) 1693 1694 test $0x20, %dh 1695 jnz L(Byte21) 1696 1697 test $0x40, %dh 1698 jnz L(Byte22) 1699 1700 .p2align 4 1701L(Byte31): 1702 movzbl -9(%edi), %eax 1703 movzbl -9(%esi), %edx 1704 sub %edx, %eax 1705 RETURN_END 1706#elif defined(USE_AS_WMEMCMP) 1707 1708/* special for wmemcmp */ 1709 test %dl, %dl 1710 jz L(next_two_double_words) 1711 and $15, %dl 1712 jz L(second_double_word) 1713 mov -16(%edi), %ecx 1714 cmp -16(%esi), %ecx 1715 mov $1, %eax 1716 jg L(nequal_bigger) 1717 neg %eax 1718 RETURN 1719 1720 .p2align 4 1721L(second_double_word): 1722 mov -12(%edi), %ecx 1723 cmp -12(%esi), %ecx 1724 mov $1, %eax 1725 jg L(nequal_bigger) 1726 neg %eax 1727 RETURN 1728 1729 .p2align 4 1730L(next_two_double_words): 1731 and $15, %dh 1732 jz L(fourth_double_word) 1733 mov -8(%edi), %ecx 1734 cmp -8(%esi), %ecx 1735 mov $1, %eax 1736 jg L(nequal_bigger) 1737 neg %eax 1738 RETURN 1739 1740 .p2align 4 1741L(fourth_double_word): 1742 mov -4(%edi), %ecx 1743 cmp -4(%esi), %ecx 1744 mov $1, %eax 1745 jg L(nequal_bigger) 1746 neg %eax 1747 RETURN 1748 1749 .p2align 4 1750L(nequal_bigger): 1751 RETURN_END 1752 1753#elif defined(USE_AS_MEMCMP16) 1754 1755/* special for __memcmp16 */ 1756 test %dl, %dl 1757 jz L(next_four_words) 1758 test $15, %dl 1759 jz L(second_two_words) 1760 test $3, %dl 1761 jz L(second_word) 1762 movzwl -16(%edi), %eax 1763 movzwl -16(%esi), %ebx 1764 subl %ebx, %eax 1765 RETURN 1766 1767 .p2align 4 1768L(second_word): 1769 movzwl -14(%edi), %eax 1770 movzwl -14(%esi), %ebx 1771 subl %ebx, %eax 1772 RETURN 1773 1774 .p2align 4 1775L(second_two_words): 1776 test $63, %dl 1777 jz L(fourth_word) 1778 movzwl -12(%edi), %eax 1779 movzwl -12(%esi), %ebx 1780 subl %ebx, %eax 1781 RETURN 1782 1783 .p2align 4 1784L(fourth_word): 1785 movzwl -10(%edi), %eax 1786 movzwl -10(%esi), %ebx 1787 subl %ebx, %eax 1788 RETURN 1789 1790 .p2align 4 1791L(next_four_words): 1792 test $15, %dh 1793 jz L(fourth_two_words) 1794 test $3, %dh 1795 jz L(sixth_word) 1796 movzwl -8(%edi), %eax 1797 movzwl -8(%esi), %ebx 1798 subl %ebx, %eax 1799 RETURN 1800 1801 .p2align 4 1802L(sixth_word): 1803 movzwl -6(%edi), %eax 1804 movzwl -6(%esi), %ebx 1805 subl %ebx, %eax 1806 RETURN 1807 1808 .p2align 4 1809L(fourth_two_words): 1810 test $63, %dh 1811 jz L(eighth_word) 1812 movzwl -4(%edi), %eax 1813 movzwl -4(%esi), %ebx 1814 subl %ebx, %eax 1815 RETURN 1816 1817 .p2align 4 1818L(eighth_word): 1819 movzwl -2(%edi), %eax 1820 movzwl -2(%esi), %ebx 1821 subl %ebx, %eax 1822 RETURN 1823#else 1824# error Unreachable preprocessor case 1825#endif 1826 1827 CFI_PUSH (%ebx) 1828 1829 .p2align 4 1830L(more8bytes): 1831 cmp $16, %ecx 1832 jae L(more16bytes) 1833 cmp $8, %ecx 1834 je L(8bytes) 1835#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1836 cmp $9, %ecx 1837 je L(9bytes) 1838 cmp $10, %ecx 1839 je L(10bytes) 1840 cmp $11, %ecx 1841 je L(11bytes) 1842 cmp $12, %ecx 1843 je L(12bytes) 1844 cmp $13, %ecx 1845 je L(13bytes) 1846 cmp $14, %ecx 1847 je L(14bytes) 1848 jmp L(15bytes) 1849#elif defined(USE_WCHAR) && !defined(USE_UTF16) 1850 jmp L(12bytes) 1851#elif defined(USE_UTF16) && !defined(USE_WCHAR) 1852 cmp $10, %ecx 1853 je L(10bytes) 1854 cmp $12, %ecx 1855 je L(12bytes) 1856 jmp L(14bytes) 1857#else 1858# error Unreachable preprocessor case 1859#endif 1860 1861 .p2align 4 1862L(more16bytes): 1863 cmp $24, %ecx 1864 jae L(more24bytes) 1865 cmp $16, %ecx 1866 je L(16bytes) 1867#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1868 cmp $17, %ecx 1869 je L(17bytes) 1870 cmp $18, %ecx 1871 je L(18bytes) 1872 cmp $19, %ecx 1873 je L(19bytes) 1874 cmp $20, %ecx 1875 je L(20bytes) 1876 cmp $21, %ecx 1877 je L(21bytes) 1878 cmp $22, %ecx 1879 je L(22bytes) 1880 jmp L(23bytes) 1881#elif defined(USE_WCHAR) && !defined(USE_UTF16) 1882 jmp L(20bytes) 1883#elif defined(USE_UTF16) && !defined(USE_WCHAR) 1884 cmp $18, %ecx 1885 je L(18bytes) 1886 cmp $20, %ecx 1887 je L(20bytes) 1888 jmp L(22bytes) 1889#else 1890# error Unreachable preprocessor case 1891#endif 1892 1893 .p2align 4 1894L(more24bytes): 1895 cmp $32, %ecx 1896 jae L(more32bytes) 1897 cmp $24, %ecx 1898 je L(24bytes) 1899#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1900 cmp $25, %ecx 1901 je L(25bytes) 1902 cmp $26, %ecx 1903 je L(26bytes) 1904 cmp $27, %ecx 1905 je L(27bytes) 1906 cmp $28, %ecx 1907 je L(28bytes) 1908 cmp $29, %ecx 1909 je L(29bytes) 1910 cmp $30, %ecx 1911 je L(30bytes) 1912 jmp L(31bytes) 1913#elif defined(USE_WCHAR) && !defined(USE_UTF16) 1914 jmp L(28bytes) 1915#elif defined(USE_UTF16) && !defined(USE_WCHAR) 1916 cmp $26, %ecx 1917 je L(26bytes) 1918 cmp $28, %ecx 1919 je L(28bytes) 1920 jmp L(30bytes) 1921#else 1922# error Unreachable preprocessor case 1923#endif 1924 1925 .p2align 4 1926L(more32bytes): 1927 cmp $40, %ecx 1928 jae L(more40bytes) 1929 cmp $32, %ecx 1930 je L(32bytes) 1931#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1932 cmp $33, %ecx 1933 je L(33bytes) 1934 cmp $34, %ecx 1935 je L(34bytes) 1936 cmp $35, %ecx 1937 je L(35bytes) 1938 cmp $36, %ecx 1939 je L(36bytes) 1940 cmp $37, %ecx 1941 je L(37bytes) 1942 cmp $38, %ecx 1943 je L(38bytes) 1944 jmp L(39bytes) 1945#elif defined(USE_WCHAR) && !defined(USE_UTF16) 1946 jmp L(36bytes) 1947#elif defined(USE_UTF16) && !defined(USE_WCHAR) 1948 cmp $34, %ecx 1949 je L(34bytes) 1950 cmp $36, %ecx 1951 je L(36bytes) 1952 jmp L(38bytes) 1953#else 1954# error Unreachable preprocessor case 1955#endif 1956 1957 .p2align 4 1958L(less48bytes): 1959 cmp $8, %ecx 1960 jae L(more8bytes) 1961#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1962 cmp $2, %ecx 1963 je L(2bytes) 1964 cmp $3, %ecx 1965 je L(3bytes) 1966 cmp $4, %ecx 1967 je L(4bytes) 1968 cmp $5, %ecx 1969 je L(5bytes) 1970 cmp $6, %ecx 1971 je L(6bytes) 1972 jmp L(7bytes) 1973#elif defined(USE_WCHAR) && !defined(USE_UTF16) 1974 jmp L(4bytes) 1975#elif defined(USE_UTF16) && !defined(USE_WCHAR) 1976 cmp $2, %ecx 1977 je L(2bytes) 1978 cmp $4, %ecx 1979 je L(4bytes) 1980 jmp L(6bytes) 1981#else 1982# error Unreachable preprocessor case 1983#endif 1984 1985 .p2align 4 1986L(more40bytes): 1987 cmp $40, %ecx 1988 je L(40bytes) 1989#if !defined(USE_WCHAR) && !defined(USE_UTF16) 1990 cmp $41, %ecx 1991 je L(41bytes) 1992 cmp $42, %ecx 1993 je L(42bytes) 1994 cmp $43, %ecx 1995 je L(43bytes) 1996 cmp $44, %ecx 1997 je L(44bytes) 1998 cmp $45, %ecx 1999 je L(45bytes) 2000 cmp $46, %ecx 2001 je L(46bytes) 2002 jmp L(47bytes) 2003#elif defined(USE_UTF16) && !defined(USE_WCHAR) 2004 cmp $42, %ecx 2005 je L(42bytes) 2006 cmp $44, %ecx 2007 je L(44bytes) 2008 jmp L(46bytes) 2009#endif 2010 2011#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16) 2012 .p2align 4 2013L(44bytes): 2014 mov -44(%eax), %ecx 2015 mov -44(%edx), %ebx 2016 cmp %ebx, %ecx 2017 jne L(find_diff) 2018L(40bytes): 2019 mov -40(%eax), %ecx 2020 mov -40(%edx), %ebx 2021 cmp %ebx, %ecx 2022 jne L(find_diff) 2023L(36bytes): 2024 mov -36(%eax), %ecx 2025 mov -36(%edx), %ebx 2026 cmp %ebx, %ecx 2027 jne L(find_diff) 2028L(32bytes): 2029 mov -32(%eax), %ecx 2030 mov -32(%edx), %ebx 2031 cmp %ebx, %ecx 2032 jne L(find_diff) 2033L(28bytes): 2034 mov -28(%eax), %ecx 2035 mov -28(%edx), %ebx 2036 cmp %ebx, %ecx 2037 jne L(find_diff) 2038L(24bytes): 2039 mov -24(%eax), %ecx 2040 mov -24(%edx), %ebx 2041 cmp %ebx, %ecx 2042 jne L(find_diff) 2043L(20bytes): 2044 mov -20(%eax), %ecx 2045 mov -20(%edx), %ebx 2046 cmp %ebx, %ecx 2047 jne L(find_diff) 2048L(16bytes): 2049 mov -16(%eax), %ecx 2050 mov -16(%edx), %ebx 2051 cmp %ebx, %ecx 2052 jne L(find_diff) 2053L(12bytes): 2054 mov -12(%eax), %ecx 2055 mov -12(%edx), %ebx 2056 cmp %ebx, %ecx 2057 jne L(find_diff) 2058L(8bytes): 2059 mov -8(%eax), %ecx 2060 mov -8(%edx), %ebx 2061 cmp %ebx, %ecx 2062 jne L(find_diff) 2063L(4bytes): 2064 mov -4(%eax), %ecx 2065 mov -4(%edx), %ebx 2066 cmp %ebx, %ecx 2067 mov $0, %eax 2068 jne L(find_diff) 2069 POP (%ebx) 2070 ret 2071 CFI_PUSH (%ebx) 2072#elif defined(USE_AS_WMEMCMP) 2073 2074 .p2align 4 2075L(44bytes): 2076 mov -44(%eax), %ecx 2077 cmp -44(%edx), %ecx 2078 jne L(find_diff) 2079L(40bytes): 2080 mov -40(%eax), %ecx 2081 cmp -40(%edx), %ecx 2082 jne L(find_diff) 2083L(36bytes): 2084 mov -36(%eax), %ecx 2085 cmp -36(%edx), %ecx 2086 jne L(find_diff) 2087L(32bytes): 2088 mov -32(%eax), %ecx 2089 cmp -32(%edx), %ecx 2090 jne L(find_diff) 2091L(28bytes): 2092 mov -28(%eax), %ecx 2093 cmp -28(%edx), %ecx 2094 jne L(find_diff) 2095L(24bytes): 2096 mov -24(%eax), %ecx 2097 cmp -24(%edx), %ecx 2098 jne L(find_diff) 2099L(20bytes): 2100 mov -20(%eax), %ecx 2101 cmp -20(%edx), %ecx 2102 jne L(find_diff) 2103L(16bytes): 2104 mov -16(%eax), %ecx 2105 cmp -16(%edx), %ecx 2106 jne L(find_diff) 2107L(12bytes): 2108 mov -12(%eax), %ecx 2109 cmp -12(%edx), %ecx 2110 jne L(find_diff) 2111L(8bytes): 2112 mov -8(%eax), %ecx 2113 cmp -8(%edx), %ecx 2114 jne L(find_diff) 2115L(4bytes): 2116 mov -4(%eax), %ecx 2117 xor %eax, %eax 2118 cmp -4(%edx), %ecx 2119 jne L(find_diff) 2120 POP (%ebx) 2121 ret 2122 CFI_PUSH (%ebx) 2123#elif defined USE_AS_MEMCMP16 2124 2125 .p2align 4 2126L(46bytes): 2127 movzwl -46(%eax), %ecx 2128 movzwl -46(%edx), %ebx 2129 subl %ebx, %ecx 2130 jne L(memcmp16_exit) 2131L(44bytes): 2132 movzwl -44(%eax), %ecx 2133 movzwl -44(%edx), %ebx 2134 subl %ebx, %ecx 2135 jne L(memcmp16_exit) 2136L(42bytes): 2137 movzwl -42(%eax), %ecx 2138 movzwl -42(%edx), %ebx 2139 subl %ebx, %ecx 2140 jne L(memcmp16_exit) 2141L(40bytes): 2142 movzwl -40(%eax), %ecx 2143 movzwl -40(%edx), %ebx 2144 subl %ebx, %ecx 2145 jne L(memcmp16_exit) 2146L(38bytes): 2147 movzwl -38(%eax), %ecx 2148 movzwl -38(%edx), %ebx 2149 subl %ebx, %ecx 2150 jne L(memcmp16_exit) 2151L(36bytes): 2152 movzwl -36(%eax), %ecx 2153 movzwl -36(%edx), %ebx 2154 subl %ebx, %ecx 2155 jne L(memcmp16_exit) 2156L(34bytes): 2157 movzwl -34(%eax), %ecx 2158 movzwl -34(%edx), %ebx 2159 subl %ebx, %ecx 2160 jne L(memcmp16_exit) 2161L(32bytes): 2162 movzwl -32(%eax), %ecx 2163 movzwl -32(%edx), %ebx 2164 subl %ebx, %ecx 2165 jne L(memcmp16_exit) 2166L(30bytes): 2167 movzwl -30(%eax), %ecx 2168 movzwl -30(%edx), %ebx 2169 subl %ebx, %ecx 2170 jne L(memcmp16_exit) 2171L(28bytes): 2172 movzwl -28(%eax), %ecx 2173 movzwl -28(%edx), %ebx 2174 subl %ebx, %ecx 2175 jne L(memcmp16_exit) 2176L(26bytes): 2177 movzwl -26(%eax), %ecx 2178 movzwl -26(%edx), %ebx 2179 subl %ebx, %ecx 2180 jne L(memcmp16_exit) 2181L(24bytes): 2182 movzwl -24(%eax), %ecx 2183 movzwl -24(%edx), %ebx 2184 subl %ebx, %ecx 2185 jne L(memcmp16_exit) 2186L(22bytes): 2187 movzwl -22(%eax), %ecx 2188 movzwl -22(%edx), %ebx 2189 subl %ebx, %ecx 2190 jne L(memcmp16_exit) 2191L(20bytes): 2192 movzwl -20(%eax), %ecx 2193 movzwl -20(%edx), %ebx 2194 subl %ebx, %ecx 2195 jne L(memcmp16_exit) 2196L(18bytes): 2197 movzwl -18(%eax), %ecx 2198 movzwl -18(%edx), %ebx 2199 subl %ebx, %ecx 2200 jne L(memcmp16_exit) 2201L(16bytes): 2202 movzwl -16(%eax), %ecx 2203 movzwl -16(%edx), %ebx 2204 subl %ebx, %ecx 2205 jne L(memcmp16_exit) 2206L(14bytes): 2207 movzwl -14(%eax), %ecx 2208 movzwl -14(%edx), %ebx 2209 subl %ebx, %ecx 2210 jne L(memcmp16_exit) 2211L(12bytes): 2212 movzwl -12(%eax), %ecx 2213 movzwl -12(%edx), %ebx 2214 subl %ebx, %ecx 2215 jne L(memcmp16_exit) 2216L(10bytes): 2217 movzwl -10(%eax), %ecx 2218 movzwl -10(%edx), %ebx 2219 subl %ebx, %ecx 2220 jne L(memcmp16_exit) 2221L(8bytes): 2222 movzwl -8(%eax), %ecx 2223 movzwl -8(%edx), %ebx 2224 subl %ebx, %ecx 2225 jne L(memcmp16_exit) 2226L(6bytes): 2227 movzwl -6(%eax), %ecx 2228 movzwl -6(%edx), %ebx 2229 subl %ebx, %ecx 2230 jne L(memcmp16_exit) 2231L(4bytes): 2232 movzwl -4(%eax), %ecx 2233 movzwl -4(%edx), %ebx 2234 subl %ebx, %ecx 2235 jne L(memcmp16_exit) 2236L(2bytes): 2237 movzwl -2(%eax), %eax 2238 movzwl -2(%edx), %ebx 2239 subl %ebx, %eax 2240 POP (%ebx) 2241 ret 2242 CFI_PUSH (%ebx) 2243#else 2244# error Unreachable preprocessor case 2245#endif 2246 2247#if !defined(USE_AS_WMEMCMP) && !defined(USE_AS_MEMCMP16) 2248 2249 .p2align 4 2250L(45bytes): 2251 mov -45(%eax), %ecx 2252 mov -45(%edx), %ebx 2253 cmp %ebx, %ecx 2254 jne L(find_diff) 2255L(41bytes): 2256 mov -41(%eax), %ecx 2257 mov -41(%edx), %ebx 2258 cmp %ebx, %ecx 2259 jne L(find_diff) 2260L(37bytes): 2261 mov -37(%eax), %ecx 2262 mov -37(%edx), %ebx 2263 cmp %ebx, %ecx 2264 jne L(find_diff) 2265L(33bytes): 2266 mov -33(%eax), %ecx 2267 mov -33(%edx), %ebx 2268 cmp %ebx, %ecx 2269 jne L(find_diff) 2270L(29bytes): 2271 mov -29(%eax), %ecx 2272 mov -29(%edx), %ebx 2273 cmp %ebx, %ecx 2274 jne L(find_diff) 2275L(25bytes): 2276 mov -25(%eax), %ecx 2277 mov -25(%edx), %ebx 2278 cmp %ebx, %ecx 2279 jne L(find_diff) 2280L(21bytes): 2281 mov -21(%eax), %ecx 2282 mov -21(%edx), %ebx 2283 cmp %ebx, %ecx 2284 jne L(find_diff) 2285L(17bytes): 2286 mov -17(%eax), %ecx 2287 mov -17(%edx), %ebx 2288 cmp %ebx, %ecx 2289 jne L(find_diff) 2290L(13bytes): 2291 mov -13(%eax), %ecx 2292 mov -13(%edx), %ebx 2293 cmp %ebx, %ecx 2294 jne L(find_diff) 2295L(9bytes): 2296 mov -9(%eax), %ecx 2297 mov -9(%edx), %ebx 2298 cmp %ebx, %ecx 2299 jne L(find_diff) 2300L(5bytes): 2301 mov -5(%eax), %ecx 2302 mov -5(%edx), %ebx 2303 cmp %ebx, %ecx 2304 jne L(find_diff) 2305 movzbl -1(%eax), %ecx 2306 cmp -1(%edx), %cl 2307 mov $0, %eax 2308 jne L(end) 2309 POP (%ebx) 2310 ret 2311 CFI_PUSH (%ebx) 2312 2313 .p2align 4 2314L(46bytes): 2315 mov -46(%eax), %ecx 2316 mov -46(%edx), %ebx 2317 cmp %ebx, %ecx 2318 jne L(find_diff) 2319L(42bytes): 2320 mov -42(%eax), %ecx 2321 mov -42(%edx), %ebx 2322 cmp %ebx, %ecx 2323 jne L(find_diff) 2324L(38bytes): 2325 mov -38(%eax), %ecx 2326 mov -38(%edx), %ebx 2327 cmp %ebx, %ecx 2328 jne L(find_diff) 2329L(34bytes): 2330 mov -34(%eax), %ecx 2331 mov -34(%edx), %ebx 2332 cmp %ebx, %ecx 2333 jne L(find_diff) 2334L(30bytes): 2335 mov -30(%eax), %ecx 2336 mov -30(%edx), %ebx 2337 cmp %ebx, %ecx 2338 jne L(find_diff) 2339L(26bytes): 2340 mov -26(%eax), %ecx 2341 mov -26(%edx), %ebx 2342 cmp %ebx, %ecx 2343 jne L(find_diff) 2344L(22bytes): 2345 mov -22(%eax), %ecx 2346 mov -22(%edx), %ebx 2347 cmp %ebx, %ecx 2348 jne L(find_diff) 2349L(18bytes): 2350 mov -18(%eax), %ecx 2351 mov -18(%edx), %ebx 2352 cmp %ebx, %ecx 2353 jne L(find_diff) 2354L(14bytes): 2355 mov -14(%eax), %ecx 2356 mov -14(%edx), %ebx 2357 cmp %ebx, %ecx 2358 jne L(find_diff) 2359L(10bytes): 2360 mov -10(%eax), %ecx 2361 mov -10(%edx), %ebx 2362 cmp %ebx, %ecx 2363 jne L(find_diff) 2364L(6bytes): 2365 mov -6(%eax), %ecx 2366 mov -6(%edx), %ebx 2367 cmp %ebx, %ecx 2368 jne L(find_diff) 2369L(2bytes): 2370 movzwl -2(%eax), %ecx 2371 movzwl -2(%edx), %ebx 2372 cmp %bl, %cl 2373 jne L(end) 2374 cmp %bh, %ch 2375 mov $0, %eax 2376 jne L(end) 2377 POP (%ebx) 2378 ret 2379 CFI_PUSH (%ebx) 2380 2381 .p2align 4 2382L(47bytes): 2383 movl -47(%eax), %ecx 2384 movl -47(%edx), %ebx 2385 cmp %ebx, %ecx 2386 jne L(find_diff) 2387L(43bytes): 2388 movl -43(%eax), %ecx 2389 movl -43(%edx), %ebx 2390 cmp %ebx, %ecx 2391 jne L(find_diff) 2392L(39bytes): 2393 movl -39(%eax), %ecx 2394 movl -39(%edx), %ebx 2395 cmp %ebx, %ecx 2396 jne L(find_diff) 2397L(35bytes): 2398 movl -35(%eax), %ecx 2399 movl -35(%edx), %ebx 2400 cmp %ebx, %ecx 2401 jne L(find_diff) 2402L(31bytes): 2403 movl -31(%eax), %ecx 2404 movl -31(%edx), %ebx 2405 cmp %ebx, %ecx 2406 jne L(find_diff) 2407L(27bytes): 2408 movl -27(%eax), %ecx 2409 movl -27(%edx), %ebx 2410 cmp %ebx, %ecx 2411 jne L(find_diff) 2412L(23bytes): 2413 movl -23(%eax), %ecx 2414 movl -23(%edx), %ebx 2415 cmp %ebx, %ecx 2416 jne L(find_diff) 2417L(19bytes): 2418 movl -19(%eax), %ecx 2419 movl -19(%edx), %ebx 2420 cmp %ebx, %ecx 2421 jne L(find_diff) 2422L(15bytes): 2423 movl -15(%eax), %ecx 2424 movl -15(%edx), %ebx 2425 cmp %ebx, %ecx 2426 jne L(find_diff) 2427L(11bytes): 2428 movl -11(%eax), %ecx 2429 movl -11(%edx), %ebx 2430 cmp %ebx, %ecx 2431 jne L(find_diff) 2432L(7bytes): 2433 movl -7(%eax), %ecx 2434 movl -7(%edx), %ebx 2435 cmp %ebx, %ecx 2436 jne L(find_diff) 2437L(3bytes): 2438 movzwl -3(%eax), %ecx 2439 movzwl -3(%edx), %ebx 2440 cmpb %bl, %cl 2441 jne L(end) 2442 cmp %bx, %cx 2443 jne L(end) 2444 movzbl -1(%eax), %eax 2445 cmpb -1(%edx), %al 2446 mov $0, %eax 2447 jne L(end) 2448 POP (%ebx) 2449 ret 2450 CFI_PUSH (%ebx) 2451 2452 .p2align 4 2453L(find_diff): 2454 cmpb %bl, %cl 2455 jne L(end) 2456 cmp %bx, %cx 2457 jne L(end) 2458 shr $16,%ecx 2459 shr $16,%ebx 2460 cmp %bl, %cl 2461 jne L(end) 2462 cmp %bx, %cx 2463 2464 .p2align 4 2465L(end): 2466 POP (%ebx) 2467 mov $1, %eax 2468 ja L(bigger) 2469 neg %eax 2470L(bigger): 2471 ret 2472#elif defined(USE_AS_WMEMCMP) 2473 2474 .p2align 4 2475L(find_diff): 2476 POP (%ebx) 2477 mov $1, %eax 2478 jg L(find_diff_bigger) 2479 neg %eax 2480 ret 2481 2482 .p2align 4 2483L(find_diff_bigger): 2484 ret 2485 2486#elif defined(USE_AS_MEMCMP16) 2487 2488 .p2align 4 2489L(memcmp16_exit): 2490 POP (%ebx) 2491 mov %ecx, %eax 2492 ret 2493#else 2494# error Unreachable preprocessor case 2495#endif 2496END (MEMCMP) 2497