1/* 2Copyright (c) 2011, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#ifndef L 32# define L(label) .L##label 33#endif 34 35#ifndef cfi_startproc 36# define cfi_startproc .cfi_startproc 37#endif 38 39#ifndef cfi_endproc 40# define cfi_endproc .cfi_endproc 41#endif 42 43#ifndef cfi_rel_offset 44# define cfi_rel_offset(reg, off) .cfi_rel_offset reg, off 45#endif 46 47#ifndef cfi_restore 48# define cfi_restore(reg) .cfi_restore reg 49#endif 50 51#ifndef cfi_adjust_cfa_offset 52# define cfi_adjust_cfa_offset(off) .cfi_adjust_cfa_offset off 53#endif 54 55#ifndef cfi_remember_state 56# define cfi_remember_state .cfi_remember_state 57#endif 58 59#ifndef cfi_restore_state 60# define cfi_restore_state .cfi_restore_state 61#endif 62 63#ifndef ENTRY 64# define ENTRY(name) \ 65 .type name, @function; \ 66 .globl name; \ 67 .p2align 4; \ 68name: \ 69 cfi_startproc 70#endif 71 72#ifndef END 73# define END(name) \ 74 cfi_endproc; \ 75 .size name, .-name 76#endif 77 78#define CFI_PUSH(REG) \ 79 cfi_adjust_cfa_offset (4); \ 80 cfi_rel_offset (REG, 0) 81 82#define CFI_POP(REG) \ 83 cfi_adjust_cfa_offset (-4); \ 84 cfi_restore (REG) 85 86#define PUSH(REG) pushl REG; CFI_PUSH (REG) 87#define POP(REG) popl REG; CFI_POP (REG) 88 89#ifndef STRCAT 90# define STRCAT strcat_ssse3 91#endif 92 93#define PARMS 4 94#define STR1 PARMS+4 95#define STR2 STR1+4 96 97#ifdef USE_AS_STRNCAT 98# define LEN STR2+8 99#endif 100 101#define USE_AS_STRCAT 102 103 .section .text.ssse3,"ax",@progbits 104ENTRY (STRCAT) 105 PUSH (%edi) 106 mov STR1(%esp), %edi 107 mov %edi, %edx 108 109#define RETURN jmp L(StrcpyAtom) 110#include "sse2-strlen-atom.S" 111 112L(StrcpyAtom): 113 mov STR2(%esp), %ecx 114 lea (%edi, %eax), %edx 115#ifdef USE_AS_STRNCAT 116 PUSH (%ebx) 117 mov LEN(%esp), %ebx 118 test %ebx, %ebx 119 jz L(StrncatExit0) 120 cmp $8, %ebx 121 jbe L(StrncpyExit8Bytes) 122#endif 123 cmpb $0, (%ecx) 124 jz L(Exit1) 125 cmpb $0, 1(%ecx) 126 jz L(Exit2) 127 cmpb $0, 2(%ecx) 128 jz L(Exit3) 129 cmpb $0, 3(%ecx) 130 jz L(Exit4) 131 cmpb $0, 4(%ecx) 132 jz L(Exit5) 133 cmpb $0, 5(%ecx) 134 jz L(Exit6) 135 cmpb $0, 6(%ecx) 136 jz L(Exit7) 137 cmpb $0, 7(%ecx) 138 jz L(Exit8) 139 cmpb $0, 8(%ecx) 140 jz L(Exit9) 141#ifdef USE_AS_STRNCAT 142 cmp $16, %ebx 143 jb L(StrncpyExit15Bytes) 144#endif 145 cmpb $0, 9(%ecx) 146 jz L(Exit10) 147 cmpb $0, 10(%ecx) 148 jz L(Exit11) 149 cmpb $0, 11(%ecx) 150 jz L(Exit12) 151 cmpb $0, 12(%ecx) 152 jz L(Exit13) 153 cmpb $0, 13(%ecx) 154 jz L(Exit14) 155 cmpb $0, 14(%ecx) 156 jz L(Exit15) 157 cmpb $0, 15(%ecx) 158 jz L(Exit16) 159#ifdef USE_AS_STRNCAT 160 cmp $16, %ebx 161 je L(StrncatExit16) 162 163# define RETURN1 POP (%ebx); POP (%edi); ret; \ 164 CFI_PUSH (%ebx); CFI_PUSH (%edi) 165# define USE_AS_STRNCPY 166#else 167# define RETURN1 POP(%edi); ret; CFI_PUSH(%edi) 168#endif 169#include "ssse3-strcpy-atom.S" 170 171 .p2align 4 172L(CopyFrom1To16Bytes): 173 add %esi, %edx 174 add %esi, %ecx 175 176 POP (%esi) 177 test %al, %al 178 jz L(ExitHigh) 179 test $0x01, %al 180 jnz L(Exit1) 181 test $0x02, %al 182 jnz L(Exit2) 183 test $0x04, %al 184 jnz L(Exit3) 185 test $0x08, %al 186 jnz L(Exit4) 187 test $0x10, %al 188 jnz L(Exit5) 189 test $0x20, %al 190 jnz L(Exit6) 191 test $0x40, %al 192 jnz L(Exit7) 193 movlpd (%ecx), %xmm0 194 movlpd %xmm0, (%edx) 195 movl %edi, %eax 196 RETURN1 197 198 .p2align 4 199L(ExitHigh): 200 test $0x01, %ah 201 jnz L(Exit9) 202 test $0x02, %ah 203 jnz L(Exit10) 204 test $0x04, %ah 205 jnz L(Exit11) 206 test $0x08, %ah 207 jnz L(Exit12) 208 test $0x10, %ah 209 jnz L(Exit13) 210 test $0x20, %ah 211 jnz L(Exit14) 212 test $0x40, %ah 213 jnz L(Exit15) 214 movlpd (%ecx), %xmm0 215 movlpd 8(%ecx), %xmm1 216 movlpd %xmm0, (%edx) 217 movlpd %xmm1, 8(%edx) 218 movl %edi, %eax 219 RETURN1 220 221 .p2align 4 222L(StrncatExit1): 223 movb %bh, 1(%edx) 224L(Exit1): 225 movb (%ecx), %al 226 movb %al, (%edx) 227 movl %edi, %eax 228 RETURN1 229 230 .p2align 4 231L(StrncatExit2): 232 movb %bh, 2(%edx) 233L(Exit2): 234 movw (%ecx), %ax 235 movw %ax, (%edx) 236 movl %edi, %eax 237 RETURN1 238 239 .p2align 4 240L(StrncatExit3): 241 movb %bh, 3(%edx) 242L(Exit3): 243 movw (%ecx), %ax 244 movw %ax, (%edx) 245 movb 2(%ecx), %al 246 movb %al, 2(%edx) 247 movl %edi, %eax 248 RETURN1 249 250 .p2align 4 251L(StrncatExit4): 252 movb %bh, 4(%edx) 253L(Exit4): 254 movl (%ecx), %eax 255 movl %eax, (%edx) 256 movl %edi, %eax 257 RETURN1 258 259 .p2align 4 260L(StrncatExit5): 261 movb %bh, 5(%edx) 262L(Exit5): 263 movl (%ecx), %eax 264 movl %eax, (%edx) 265 movb 4(%ecx), %al 266 movb %al, 4(%edx) 267 movl %edi, %eax 268 RETURN1 269 270 .p2align 4 271L(StrncatExit6): 272 movb %bh, 6(%edx) 273L(Exit6): 274 movl (%ecx), %eax 275 movl %eax, (%edx) 276 movw 4(%ecx), %ax 277 movw %ax, 4(%edx) 278 movl %edi, %eax 279 RETURN1 280 281 .p2align 4 282L(StrncatExit7): 283 movb %bh, 7(%edx) 284L(Exit7): 285 movl (%ecx), %eax 286 movl %eax, (%edx) 287 movl 3(%ecx), %eax 288 movl %eax, 3(%edx) 289 movl %edi, %eax 290 RETURN1 291 292 .p2align 4 293L(StrncatExit8): 294 movb %bh, 8(%edx) 295L(Exit8): 296 movlpd (%ecx), %xmm0 297 movlpd %xmm0, (%edx) 298 movl %edi, %eax 299 RETURN1 300 301 .p2align 4 302L(StrncatExit9): 303 movb %bh, 9(%edx) 304L(Exit9): 305 movlpd (%ecx), %xmm0 306 movlpd %xmm0, (%edx) 307 movb 8(%ecx), %al 308 movb %al, 8(%edx) 309 movl %edi, %eax 310 RETURN1 311 312 .p2align 4 313L(StrncatExit10): 314 movb %bh, 10(%edx) 315L(Exit10): 316 movlpd (%ecx), %xmm0 317 movlpd %xmm0, (%edx) 318 movw 8(%ecx), %ax 319 movw %ax, 8(%edx) 320 movl %edi, %eax 321 RETURN1 322 323 .p2align 4 324L(StrncatExit11): 325 movb %bh, 11(%edx) 326L(Exit11): 327 movlpd (%ecx), %xmm0 328 movlpd %xmm0, (%edx) 329 movl 7(%ecx), %eax 330 movl %eax, 7(%edx) 331 movl %edi, %eax 332 RETURN1 333 334 .p2align 4 335L(StrncatExit12): 336 movb %bh, 12(%edx) 337L(Exit12): 338 movlpd (%ecx), %xmm0 339 movlpd %xmm0, (%edx) 340 movl 8(%ecx), %eax 341 movl %eax, 8(%edx) 342 movl %edi, %eax 343 RETURN1 344 345 .p2align 4 346L(StrncatExit13): 347 movb %bh, 13(%edx) 348L(Exit13): 349 movlpd (%ecx), %xmm0 350 movlpd %xmm0, (%edx) 351 movlpd 5(%ecx), %xmm0 352 movlpd %xmm0, 5(%edx) 353 movl %edi, %eax 354 RETURN1 355 356 .p2align 4 357L(StrncatExit14): 358 movb %bh, 14(%edx) 359L(Exit14): 360 movlpd (%ecx), %xmm0 361 movlpd %xmm0, (%edx) 362 movlpd 6(%ecx), %xmm0 363 movlpd %xmm0, 6(%edx) 364 movl %edi, %eax 365 RETURN1 366 367 .p2align 4 368L(StrncatExit15): 369 movb %bh, 15(%edx) 370L(Exit15): 371 movlpd (%ecx), %xmm0 372 movlpd %xmm0, (%edx) 373 movlpd 7(%ecx), %xmm0 374 movlpd %xmm0, 7(%edx) 375 movl %edi, %eax 376 RETURN1 377 378 .p2align 4 379L(StrncatExit16): 380 movb %bh, 16(%edx) 381L(Exit16): 382 movlpd (%ecx), %xmm0 383 movlpd 8(%ecx), %xmm1 384 movlpd %xmm0, (%edx) 385 movlpd %xmm1, 8(%edx) 386 movl %edi, %eax 387 RETURN1 388 389#ifdef USE_AS_STRNCPY 390 391 CFI_PUSH(%esi) 392 393 .p2align 4 394L(CopyFrom1To16BytesCase2): 395 add $16, %ebx 396 add %esi, %ecx 397 lea (%esi, %edx), %esi 398 lea -9(%ebx), %edx 399 and $1<<7, %dh 400 or %al, %dh 401 lea (%esi), %edx 402 POP (%esi) 403 jz L(ExitHighCase2) 404 405 test $0x01, %al 406 jnz L(Exit1) 407 cmp $1, %ebx 408 je L(StrncatExit1) 409 test $0x02, %al 410 jnz L(Exit2) 411 cmp $2, %ebx 412 je L(StrncatExit2) 413 test $0x04, %al 414 jnz L(Exit3) 415 cmp $3, %ebx 416 je L(StrncatExit3) 417 test $0x08, %al 418 jnz L(Exit4) 419 cmp $4, %ebx 420 je L(StrncatExit4) 421 test $0x10, %al 422 jnz L(Exit5) 423 cmp $5, %ebx 424 je L(StrncatExit5) 425 test $0x20, %al 426 jnz L(Exit6) 427 cmp $6, %ebx 428 je L(StrncatExit6) 429 test $0x40, %al 430 jnz L(Exit7) 431 cmp $7, %ebx 432 je L(StrncatExit7) 433 movlpd (%ecx), %xmm0 434 movlpd %xmm0, (%edx) 435 lea 7(%edx), %eax 436 cmpb $1, (%eax) 437 sbb $-1, %eax 438 xor %cl, %cl 439 movb %cl, (%eax) 440 movl %edi, %eax 441 RETURN1 442 443 .p2align 4 444L(ExitHighCase2): 445 test $0x01, %ah 446 jnz L(Exit9) 447 cmp $9, %ebx 448 je L(StrncatExit9) 449 test $0x02, %ah 450 jnz L(Exit10) 451 cmp $10, %ebx 452 je L(StrncatExit10) 453 test $0x04, %ah 454 jnz L(Exit11) 455 cmp $11, %ebx 456 je L(StrncatExit11) 457 test $0x8, %ah 458 jnz L(Exit12) 459 cmp $12, %ebx 460 je L(StrncatExit12) 461 test $0x10, %ah 462 jnz L(Exit13) 463 cmp $13, %ebx 464 je L(StrncatExit13) 465 test $0x20, %ah 466 jnz L(Exit14) 467 cmp $14, %ebx 468 je L(StrncatExit14) 469 test $0x40, %ah 470 jnz L(Exit15) 471 cmp $15, %ebx 472 je L(StrncatExit15) 473 movlpd (%ecx), %xmm0 474 movlpd %xmm0, (%edx) 475 movlpd 8(%ecx), %xmm1 476 movlpd %xmm1, 8(%edx) 477 movl %edi, %eax 478 RETURN1 479 480 CFI_PUSH(%esi) 481 482L(CopyFrom1To16BytesCase2OrCase3): 483 test %eax, %eax 484 jnz L(CopyFrom1To16BytesCase2) 485 486 .p2align 4 487L(CopyFrom1To16BytesCase3): 488 add $16, %ebx 489 add %esi, %edx 490 add %esi, %ecx 491 492 POP (%esi) 493 494 cmp $8, %ebx 495 ja L(ExitHighCase3) 496 cmp $1, %ebx 497 je L(StrncatExit1) 498 cmp $2, %ebx 499 je L(StrncatExit2) 500 cmp $3, %ebx 501 je L(StrncatExit3) 502 cmp $4, %ebx 503 je L(StrncatExit4) 504 cmp $5, %ebx 505 je L(StrncatExit5) 506 cmp $6, %ebx 507 je L(StrncatExit6) 508 cmp $7, %ebx 509 je L(StrncatExit7) 510 movlpd (%ecx), %xmm0 511 movlpd %xmm0, (%edx) 512 movb %bh, 8(%edx) 513 movl %edi, %eax 514 RETURN1 515 516 .p2align 4 517L(ExitHighCase3): 518 cmp $9, %ebx 519 je L(StrncatExit9) 520 cmp $10, %ebx 521 je L(StrncatExit10) 522 cmp $11, %ebx 523 je L(StrncatExit11) 524 cmp $12, %ebx 525 je L(StrncatExit12) 526 cmp $13, %ebx 527 je L(StrncatExit13) 528 cmp $14, %ebx 529 je L(StrncatExit14) 530 cmp $15, %ebx 531 je L(StrncatExit15) 532 movlpd (%ecx), %xmm0 533 movlpd %xmm0, (%edx) 534 movlpd 8(%ecx), %xmm1 535 movlpd %xmm1, 8(%edx) 536 movb %bh, 16(%edx) 537 movl %edi, %eax 538 RETURN1 539 540 .p2align 4 541L(StrncatExit0): 542 movl %edi, %eax 543 RETURN1 544 545 .p2align 4 546L(StrncpyExit15Bytes): 547 cmp $9, %ebx 548 je L(StrncatExit9) 549 cmpb $0, 9(%ecx) 550 jz L(Exit10) 551 cmp $10, %ebx 552 je L(StrncatExit10) 553 cmpb $0, 10(%ecx) 554 jz L(Exit11) 555 cmp $11, %ebx 556 je L(StrncatExit11) 557 cmpb $0, 11(%ecx) 558 jz L(Exit12) 559 cmp $12, %ebx 560 je L(StrncatExit12) 561 cmpb $0, 12(%ecx) 562 jz L(Exit13) 563 cmp $13, %ebx 564 je L(StrncatExit13) 565 cmpb $0, 13(%ecx) 566 jz L(Exit14) 567 cmp $14, %ebx 568 je L(StrncatExit14) 569 movlpd (%ecx), %xmm0 570 movlpd %xmm0, (%edx) 571 movlpd 7(%ecx), %xmm0 572 movlpd %xmm0, 7(%edx) 573 lea 14(%edx), %eax 574 cmpb $1, (%eax) 575 sbb $-1, %eax 576 movb %bh, (%eax) 577 movl %edi, %eax 578 RETURN1 579 580 .p2align 4 581L(StrncpyExit8Bytes): 582 cmpb $0, (%ecx) 583 jz L(Exit1) 584 cmp $1, %ebx 585 je L(StrncatExit1) 586 cmpb $0, 1(%ecx) 587 jz L(Exit2) 588 cmp $2, %ebx 589 je L(StrncatExit2) 590 cmpb $0, 2(%ecx) 591 jz L(Exit3) 592 cmp $3, %ebx 593 je L(StrncatExit3) 594 cmpb $0, 3(%ecx) 595 jz L(Exit4) 596 cmp $4, %ebx 597 je L(StrncatExit4) 598 cmpb $0, 4(%ecx) 599 jz L(Exit5) 600 cmp $5, %ebx 601 je L(StrncatExit5) 602 cmpb $0, 5(%ecx) 603 jz L(Exit6) 604 cmp $6, %ebx 605 je L(StrncatExit6) 606 cmpb $0, 6(%ecx) 607 jz L(Exit7) 608 cmp $7, %ebx 609 je L(StrncatExit7) 610 movlpd (%ecx), %xmm0 611 movlpd %xmm0, (%edx) 612 lea 7(%edx), %eax 613 cmpb $1, (%eax) 614 sbb $-1, %eax 615 movb %bh, (%eax) 616 movl %edi, %eax 617 RETURN1 618 619#endif 620END (STRCAT_ssse3) 621