1/* 2Copyright (c) 2011, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#ifndef USE_AS_STRCAT 32 33# ifndef STRLEN 34# define STRLEN strlen 35# endif 36 37# ifndef L 38# define L(label) .L##label 39# endif 40 41# ifndef cfi_startproc 42# define cfi_startproc .cfi_startproc 43# endif 44 45# ifndef cfi_endproc 46# define cfi_endproc .cfi_endproc 47# endif 48 49/* calee safe register only for strnlen is required */ 50 51# ifdef USE_AS_STRNLEN 52# ifndef cfi_rel_offset 53# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 54# endif 55 56# ifndef cfi_restore 57# define cfi_restore(reg) .cfi_restore reg 58# endif 59 60# ifndef cfi_adjust_cfa_offset 61# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 62# endif 63# endif 64 65# ifndef ENTRY 66# define ENTRY(name) \ 67 .type name, @function; \ 68 .globl name; \ 69 .p2align 4; \ 70name: \ 71 cfi_startproc 72# endif 73 74# ifndef END 75# define END(name) \ 76 cfi_endproc; \ 77 .size name, .-name 78# endif 79 80# define PARMS 4 81# define STR PARMS 82# define RETURN ret 83 84# ifdef USE_AS_STRNLEN 85# define LEN PARMS + 8 86# define CFI_PUSH(REG) \ 87 cfi_adjust_cfa_offset (4); \ 88 cfi_rel_offset (REG, 0) 89 90# define CFI_POP(REG) \ 91 cfi_adjust_cfa_offset (-4); \ 92 cfi_restore (REG) 93 94# define PUSH(REG) pushl REG; CFI_PUSH (REG) 95# define POP(REG) popl REG; CFI_POP (REG) 96# undef RETURN 97# define RETURN POP (%edi); ret; CFI_PUSH(%edi); 98# endif 99 100 .text 101ENTRY (STRLEN) 102 mov STR(%esp), %edx 103# ifdef USE_AS_STRNLEN 104 PUSH (%edi) 105 movl LEN(%esp), %edi 106 sub $4, %edi 107 jbe L(len_less4_prolog) 108# endif 109#endif 110 xor %eax, %eax 111 cmpb $0, (%edx) 112 jz L(exit_tail0) 113 cmpb $0, 1(%edx) 114 jz L(exit_tail1) 115 cmpb $0, 2(%edx) 116 jz L(exit_tail2) 117 cmpb $0, 3(%edx) 118 jz L(exit_tail3) 119 120#ifdef USE_AS_STRNLEN 121 sub $4, %edi 122 jbe L(len_less8_prolog) 123#endif 124 125 cmpb $0, 4(%edx) 126 jz L(exit_tail4) 127 cmpb $0, 5(%edx) 128 jz L(exit_tail5) 129 cmpb $0, 6(%edx) 130 jz L(exit_tail6) 131 cmpb $0, 7(%edx) 132 jz L(exit_tail7) 133 134#ifdef USE_AS_STRNLEN 135 sub $4, %edi 136 jbe L(len_less12_prolog) 137#endif 138 139 cmpb $0, 8(%edx) 140 jz L(exit_tail8) 141 cmpb $0, 9(%edx) 142 jz L(exit_tail9) 143 cmpb $0, 10(%edx) 144 jz L(exit_tail10) 145 cmpb $0, 11(%edx) 146 jz L(exit_tail11) 147 148#ifdef USE_AS_STRNLEN 149 sub $4, %edi 150 jbe L(len_less16_prolog) 151#endif 152 153 cmpb $0, 12(%edx) 154 jz L(exit_tail12) 155 cmpb $0, 13(%edx) 156 jz L(exit_tail13) 157 cmpb $0, 14(%edx) 158 jz L(exit_tail14) 159 cmpb $0, 15(%edx) 160 jz L(exit_tail15) 161 162 pxor %xmm0, %xmm0 163 lea 16(%edx), %eax 164 mov %eax, %ecx 165 and $-16, %eax 166 167#ifdef USE_AS_STRNLEN 168 and $15, %edx 169 add %edx, %edi 170 sub $64, %edi 171 jbe L(len_less64) 172#endif 173 174 pcmpeqb (%eax), %xmm0 175 pmovmskb %xmm0, %edx 176 pxor %xmm1, %xmm1 177 lea 16(%eax), %eax 178 test %edx, %edx 179 jnz L(exit) 180 181 pcmpeqb (%eax), %xmm1 182 pmovmskb %xmm1, %edx 183 pxor %xmm2, %xmm2 184 lea 16(%eax), %eax 185 test %edx, %edx 186 jnz L(exit) 187 188 pcmpeqb (%eax), %xmm2 189 pmovmskb %xmm2, %edx 190 pxor %xmm3, %xmm3 191 lea 16(%eax), %eax 192 test %edx, %edx 193 jnz L(exit) 194 195 pcmpeqb (%eax), %xmm3 196 pmovmskb %xmm3, %edx 197 lea 16(%eax), %eax 198 test %edx, %edx 199 jnz L(exit) 200 201#ifdef USE_AS_STRNLEN 202 sub $64, %edi 203 jbe L(len_less64) 204#endif 205 206 pcmpeqb (%eax), %xmm0 207 pmovmskb %xmm0, %edx 208 lea 16(%eax), %eax 209 test %edx, %edx 210 jnz L(exit) 211 212 pcmpeqb (%eax), %xmm1 213 pmovmskb %xmm1, %edx 214 lea 16(%eax), %eax 215 test %edx, %edx 216 jnz L(exit) 217 218 pcmpeqb (%eax), %xmm2 219 pmovmskb %xmm2, %edx 220 lea 16(%eax), %eax 221 test %edx, %edx 222 jnz L(exit) 223 224 pcmpeqb (%eax), %xmm3 225 pmovmskb %xmm3, %edx 226 lea 16(%eax), %eax 227 test %edx, %edx 228 jnz L(exit) 229 230#ifdef USE_AS_STRNLEN 231 sub $64, %edi 232 jbe L(len_less64) 233#endif 234 235 pcmpeqb (%eax), %xmm0 236 pmovmskb %xmm0, %edx 237 lea 16(%eax), %eax 238 test %edx, %edx 239 jnz L(exit) 240 241 pcmpeqb (%eax), %xmm1 242 pmovmskb %xmm1, %edx 243 lea 16(%eax), %eax 244 test %edx, %edx 245 jnz L(exit) 246 247 pcmpeqb (%eax), %xmm2 248 pmovmskb %xmm2, %edx 249 lea 16(%eax), %eax 250 test %edx, %edx 251 jnz L(exit) 252 253 pcmpeqb (%eax), %xmm3 254 pmovmskb %xmm3, %edx 255 lea 16(%eax), %eax 256 test %edx, %edx 257 jnz L(exit) 258 259#ifdef USE_AS_STRNLEN 260 sub $64, %edi 261 jbe L(len_less64) 262#endif 263 264 pcmpeqb (%eax), %xmm0 265 pmovmskb %xmm0, %edx 266 lea 16(%eax), %eax 267 test %edx, %edx 268 jnz L(exit) 269 270 pcmpeqb (%eax), %xmm1 271 pmovmskb %xmm1, %edx 272 lea 16(%eax), %eax 273 test %edx, %edx 274 jnz L(exit) 275 276 pcmpeqb (%eax), %xmm2 277 pmovmskb %xmm2, %edx 278 lea 16(%eax), %eax 279 test %edx, %edx 280 jnz L(exit) 281 282 pcmpeqb (%eax), %xmm3 283 pmovmskb %xmm3, %edx 284 lea 16(%eax), %eax 285 test %edx, %edx 286 jnz L(exit) 287 288#ifdef USE_AS_STRNLEN 289 mov %eax, %edx 290 and $63, %edx 291 add %edx, %edi 292#endif 293 294 and $-0x40, %eax 295 296 .p2align 4 297L(aligned_64_loop): 298#ifdef USE_AS_STRNLEN 299 sub $64, %edi 300 jbe L(len_less64) 301#endif 302 movaps (%eax), %xmm0 303 movaps 16(%eax), %xmm1 304 movaps 32(%eax), %xmm2 305 movaps 48(%eax), %xmm6 306 pminub %xmm1, %xmm0 307 pminub %xmm6, %xmm2 308 pminub %xmm0, %xmm2 309 pcmpeqb %xmm3, %xmm2 310 pmovmskb %xmm2, %edx 311 lea 64(%eax), %eax 312 test %edx, %edx 313 jz L(aligned_64_loop) 314 315 pcmpeqb -64(%eax), %xmm3 316 pmovmskb %xmm3, %edx 317 lea 48(%ecx), %ecx 318 test %edx, %edx 319 jnz L(exit) 320 321 pcmpeqb %xmm1, %xmm3 322 pmovmskb %xmm3, %edx 323 lea -16(%ecx), %ecx 324 test %edx, %edx 325 jnz L(exit) 326 327 pcmpeqb -32(%eax), %xmm3 328 pmovmskb %xmm3, %edx 329 lea -16(%ecx), %ecx 330 test %edx, %edx 331 jnz L(exit) 332 333 pcmpeqb %xmm6, %xmm3 334 pmovmskb %xmm3, %edx 335 lea -16(%ecx), %ecx 336L(exit): 337 sub %ecx, %eax 338 test %dl, %dl 339 jz L(exit_high) 340 341 mov %dl, %cl 342 and $15, %cl 343 jz L(exit_8) 344 test $0x01, %dl 345 jnz L(exit_tail0) 346 test $0x02, %dl 347 jnz L(exit_tail1) 348 test $0x04, %dl 349 jnz L(exit_tail2) 350 add $3, %eax 351 RETURN 352 353 .p2align 4 354L(exit_8): 355 test $0x10, %dl 356 jnz L(exit_tail4) 357 test $0x20, %dl 358 jnz L(exit_tail5) 359 test $0x40, %dl 360 jnz L(exit_tail6) 361 add $7, %eax 362 RETURN 363 364 .p2align 4 365L(exit_high): 366 mov %dh, %ch 367 and $15, %ch 368 jz L(exit_high_8) 369 test $0x01, %dh 370 jnz L(exit_tail8) 371 test $0x02, %dh 372 jnz L(exit_tail9) 373 test $0x04, %dh 374 jnz L(exit_tail10) 375 add $11, %eax 376 RETURN 377 378 .p2align 4 379L(exit_high_8): 380 test $0x10, %dh 381 jnz L(exit_tail12) 382 test $0x20, %dh 383 jnz L(exit_tail13) 384 test $0x40, %dh 385 jnz L(exit_tail14) 386 add $15, %eax 387L(exit_tail0): 388 RETURN 389 390#ifdef USE_AS_STRNLEN 391 392 .p2align 4 393L(len_less64): 394 pxor %xmm0, %xmm0 395 add $64, %edi 396 397 pcmpeqb (%eax), %xmm0 398 pmovmskb %xmm0, %edx 399 pxor %xmm1, %xmm1 400 lea 16(%eax), %eax 401 test %edx, %edx 402 jnz L(strnlen_exit) 403 404 sub $16, %edi 405 jbe L(return_start_len) 406 407 pcmpeqb (%eax), %xmm1 408 pmovmskb %xmm1, %edx 409 lea 16(%eax), %eax 410 test %edx, %edx 411 jnz L(strnlen_exit) 412 413 sub $16, %edi 414 jbe L(return_start_len) 415 416 pcmpeqb (%eax), %xmm0 417 pmovmskb %xmm0, %edx 418 lea 16(%eax), %eax 419 test %edx, %edx 420 jnz L(strnlen_exit) 421 422 sub $16, %edi 423 jbe L(return_start_len) 424 425 pcmpeqb (%eax), %xmm1 426 pmovmskb %xmm1, %edx 427 lea 16(%eax), %eax 428 test %edx, %edx 429 jnz L(strnlen_exit) 430 431#ifndef USE_AS_STRLCAT 432 movl LEN(%esp), %eax 433 RETURN 434#else 435 jmp L(return_start_len) 436#endif 437 438 .p2align 4 439L(strnlen_exit): 440 sub %ecx, %eax 441 442 test %dl, %dl 443 jz L(strnlen_exit_high) 444 mov %dl, %cl 445 and $15, %cl 446 jz L(strnlen_exit_8) 447 test $0x01, %dl 448 jnz L(exit_tail0) 449 test $0x02, %dl 450 jnz L(strnlen_exit_tail1) 451 test $0x04, %dl 452 jnz L(strnlen_exit_tail2) 453 sub $4, %edi 454 jb L(return_start_len) 455 lea 3(%eax), %eax 456 RETURN 457 458 .p2align 4 459L(strnlen_exit_8): 460 test $0x10, %dl 461 jnz L(strnlen_exit_tail4) 462 test $0x20, %dl 463 jnz L(strnlen_exit_tail5) 464 test $0x40, %dl 465 jnz L(strnlen_exit_tail6) 466 sub $8, %edi 467 jb L(return_start_len) 468 lea 7(%eax), %eax 469 RETURN 470 471 .p2align 4 472L(strnlen_exit_high): 473 mov %dh, %ch 474 and $15, %ch 475 jz L(strnlen_exit_high_8) 476 test $0x01, %dh 477 jnz L(strnlen_exit_tail8) 478 test $0x02, %dh 479 jnz L(strnlen_exit_tail9) 480 test $0x04, %dh 481 jnz L(strnlen_exit_tail10) 482 sub $12, %edi 483 jb L(return_start_len) 484 lea 11(%eax), %eax 485 RETURN 486 487 .p2align 4 488L(strnlen_exit_high_8): 489 test $0x10, %dh 490 jnz L(strnlen_exit_tail12) 491 test $0x20, %dh 492 jnz L(strnlen_exit_tail13) 493 test $0x40, %dh 494 jnz L(strnlen_exit_tail14) 495 sub $16, %edi 496 jb L(return_start_len) 497 lea 15(%eax), %eax 498 RETURN 499 500 .p2align 4 501L(strnlen_exit_tail1): 502 sub $2, %edi 503 jb L(return_start_len) 504 lea 1(%eax), %eax 505 RETURN 506 507 .p2align 4 508L(strnlen_exit_tail2): 509 sub $3, %edi 510 jb L(return_start_len) 511 lea 2(%eax), %eax 512 RETURN 513 514 .p2align 4 515L(strnlen_exit_tail4): 516 sub $5, %edi 517 jb L(return_start_len) 518 lea 4(%eax), %eax 519 RETURN 520 521 .p2align 4 522L(strnlen_exit_tail5): 523 sub $6, %edi 524 jb L(return_start_len) 525 lea 5(%eax), %eax 526 RETURN 527 528 .p2align 4 529L(strnlen_exit_tail6): 530 sub $7, %edi 531 jb L(return_start_len) 532 lea 6(%eax), %eax 533 RETURN 534 535 .p2align 4 536L(strnlen_exit_tail8): 537 sub $9, %edi 538 jb L(return_start_len) 539 lea 8(%eax), %eax 540 RETURN 541 542 .p2align 4 543L(strnlen_exit_tail9): 544 sub $10, %edi 545 jb L(return_start_len) 546 lea 9(%eax), %eax 547 RETURN 548 549 .p2align 4 550L(strnlen_exit_tail10): 551 sub $11, %edi 552 jb L(return_start_len) 553 lea 10(%eax), %eax 554 RETURN 555 556 .p2align 4 557L(strnlen_exit_tail12): 558 sub $13, %edi 559 jb L(return_start_len) 560 lea 12(%eax), %eax 561 RETURN 562 563 .p2align 4 564L(strnlen_exit_tail13): 565 sub $14, %edi 566 jb L(return_start_len) 567 lea 13(%eax), %eax 568 RETURN 569 570 .p2align 4 571L(strnlen_exit_tail14): 572 sub $15, %edi 573 jb L(return_start_len) 574 lea 14(%eax), %eax 575 RETURN 576 577#ifndef USE_AS_STRLCAT 578 .p2align 4 579L(return_start_len): 580 movl LEN(%esp), %eax 581 RETURN 582#endif 583 584/* for prolog only */ 585 586 .p2align 4 587L(len_less4_prolog): 588 xor %eax, %eax 589 590 add $4, %edi 591 jz L(exit_tail0) 592 593 cmpb $0, (%edx) 594 jz L(exit_tail0) 595 cmp $1, %edi 596 je L(exit_tail1) 597 598 cmpb $0, 1(%edx) 599 jz L(exit_tail1) 600 cmp $2, %edi 601 je L(exit_tail2) 602 603 cmpb $0, 2(%edx) 604 jz L(exit_tail2) 605 cmp $3, %edi 606 je L(exit_tail3) 607 608 cmpb $0, 3(%edx) 609 jz L(exit_tail3) 610 mov %edi, %eax 611 RETURN 612 613 .p2align 4 614L(len_less8_prolog): 615 add $4, %edi 616 617 cmpb $0, 4(%edx) 618 jz L(exit_tail4) 619 cmp $1, %edi 620 je L(exit_tail5) 621 622 cmpb $0, 5(%edx) 623 jz L(exit_tail5) 624 cmp $2, %edi 625 je L(exit_tail6) 626 627 cmpb $0, 6(%edx) 628 jz L(exit_tail6) 629 cmp $3, %edi 630 je L(exit_tail7) 631 632 cmpb $0, 7(%edx) 633 jz L(exit_tail7) 634 mov $8, %eax 635 RETURN 636 637 638 .p2align 4 639L(len_less12_prolog): 640 add $4, %edi 641 642 cmpb $0, 8(%edx) 643 jz L(exit_tail8) 644 cmp $1, %edi 645 je L(exit_tail9) 646 647 cmpb $0, 9(%edx) 648 jz L(exit_tail9) 649 cmp $2, %edi 650 je L(exit_tail10) 651 652 cmpb $0, 10(%edx) 653 jz L(exit_tail10) 654 cmp $3, %edi 655 je L(exit_tail11) 656 657 cmpb $0, 11(%edx) 658 jz L(exit_tail11) 659 mov $12, %eax 660 RETURN 661 662 .p2align 4 663L(len_less16_prolog): 664 add $4, %edi 665 666 cmpb $0, 12(%edx) 667 jz L(exit_tail12) 668 cmp $1, %edi 669 je L(exit_tail13) 670 671 cmpb $0, 13(%edx) 672 jz L(exit_tail13) 673 cmp $2, %edi 674 je L(exit_tail14) 675 676 cmpb $0, 14(%edx) 677 jz L(exit_tail14) 678 cmp $3, %edi 679 je L(exit_tail15) 680 681 cmpb $0, 15(%edx) 682 jz L(exit_tail15) 683 mov $16, %eax 684 RETURN 685#endif 686 687 .p2align 4 688L(exit_tail1): 689 add $1, %eax 690 RETURN 691 692L(exit_tail2): 693 add $2, %eax 694 RETURN 695 696L(exit_tail3): 697 add $3, %eax 698 RETURN 699 700L(exit_tail4): 701 add $4, %eax 702 RETURN 703 704L(exit_tail5): 705 add $5, %eax 706 RETURN 707 708L(exit_tail6): 709 add $6, %eax 710 RETURN 711 712L(exit_tail7): 713 add $7, %eax 714 RETURN 715 716L(exit_tail8): 717 add $8, %eax 718 RETURN 719 720L(exit_tail9): 721 add $9, %eax 722 RETURN 723 724L(exit_tail10): 725 add $10, %eax 726 RETURN 727 728L(exit_tail11): 729 add $11, %eax 730 RETURN 731 732L(exit_tail12): 733 add $12, %eax 734 RETURN 735 736L(exit_tail13): 737 add $13, %eax 738 RETURN 739 740L(exit_tail14): 741 add $14, %eax 742 RETURN 743 744L(exit_tail15): 745 add $15, %eax 746#ifndef USE_AS_STRCAT 747 RETURN 748END (STRLEN) 749#endif 750