1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56#include <private/bionic_asm.h> 57 58 .syntax unified 59 60 .thumb 61 .thumb_func 62 63 .macro m_push 64 push {r0, r4, r5, lr} 65 .endm // m_push 66 67 .macro m_ret inst 68 \inst {r0, r4, r5, pc} 69 .endm // m_ret 70 71 .macro m_scan_byte 72 ldrb r3, [r0] 73 cbz r3, .Lstrcat_r0_scan_done 74 add r0, #1 75 .endm // m_scan_byte 76 77 .macro m_copy_byte reg, cmd, label 78 ldrb \reg, [r1], #1 79 strb \reg, [r0], #1 80 \cmd \reg, \label 81 .endm // m_copy_byte 82 83ENTRY(strcat) 84 // Quick check to see if src is empty. 85 ldrb r2, [r1] 86 pld [r1, #0] 87 cbnz r2, .Lstrcat_continue 88 bx lr 89 90.Lstrcat_continue: 91 // To speed up really small dst strings, unroll checking the first 4 bytes. 92 m_push 93 m_scan_byte 94 m_scan_byte 95 m_scan_byte 96 m_scan_byte 97 98 ands r3, r0, #7 99 bne .Lstrcat_align_src 100 101 .p2align 2 102.Lstrcat_mainloop: 103 ldmia r0!, {r2, r3} 104 105 pld [r0, #64] 106 107 sub ip, r2, #0x01010101 108 bic ip, ip, r2 109 ands ip, ip, #0x80808080 110 bne .Lstrcat_zero_in_first_register 111 112 sub ip, r3, #0x01010101 113 bic ip, ip, r3 114 ands ip, ip, #0x80808080 115 bne .Lstrcat_zero_in_second_register 116 b .Lstrcat_mainloop 117 118.Lstrcat_zero_in_first_register: 119 sub r0, r0, #4 120 121.Lstrcat_zero_in_second_register: 122 // Check for zero in byte 0. 123 tst ip, #0x80 124 it ne 125 subne r0, r0, #4 126 bne .Lstrcat_r0_scan_done 127 // Check for zero in byte 1. 128 tst ip, #0x8000 129 it ne 130 subne r0, r0, #3 131 bne .Lstrcat_r0_scan_done 132 // Check for zero in byte 2. 133 tst ip, #0x800000 134 it ne 135 subne r0, r0, #2 136 it eq 137 // Zero is in byte 3. 138 subeq r0, r0, #1 139 140.Lstrcat_r0_scan_done: 141 // Unroll the first 8 bytes that will be copied. 142 m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish 143 m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish 144 m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish 145 m_copy_byte reg=r5, cmd=cbz, label=.Lstrcpy_finish 146 m_copy_byte reg=r2, cmd=cbz, label=.Lstrcpy_finish 147 m_copy_byte reg=r3, cmd=cbz, label=.Lstrcpy_finish 148 m_copy_byte reg=r4, cmd=cbz, label=.Lstrcpy_finish 149 m_copy_byte reg=r5, cmd=cbnz, label=.Lstrcpy_continue 150 151.Lstrcpy_finish: 152 m_ret inst=pop 153 154.Lstrcpy_continue: 155 pld [r1, #0] 156 ands r3, r0, #7 157 bne .Lstrcpy_align_dst 158 159.Lstrcpy_check_src_align: 160 // At this point dst is aligned to a double word, check if src 161 // is also aligned to a double word. 162 ands r3, r1, #7 163 bne .Lstrcpy_unaligned_copy 164 165 .p2align 2 166.Lstrcpy_mainloop: 167 ldmia r1!, {r2, r3} 168 169 pld [r1, #64] 170 171 sub ip, r2, #0x01010101 172 bic ip, ip, r2 173 ands ip, ip, #0x80808080 174 bne .Lstrcpy_zero_in_first_register 175 176 sub ip, r3, #0x01010101 177 bic ip, ip, r3 178 ands ip, ip, #0x80808080 179 bne .Lstrcpy_zero_in_second_register 180 181 stmia r0!, {r2, r3} 182 b .Lstrcpy_mainloop 183 184.Lstrcpy_zero_in_first_register: 185 lsls lr, ip, #17 186 itt ne 187 strbne r2, [r0] 188 m_ret inst=popne 189 itt cs 190 strhcs r2, [r0] 191 m_ret inst=popcs 192 lsls ip, ip, #1 193 itt eq 194 streq r2, [r0] 195 m_ret inst=popeq 196 strh r2, [r0], #2 197 lsr r3, r2, #16 198 strb r3, [r0] 199 m_ret inst=pop 200 201.Lstrcpy_zero_in_second_register: 202 lsls lr, ip, #17 203 ittt ne 204 stmiane r0!, {r2} 205 strbne r3, [r0] 206 m_ret inst=popne 207 ittt cs 208 strcs r2, [r0], #4 209 strhcs r3, [r0] 210 m_ret inst=popcs 211 lsls ip, ip, #1 212 itt eq 213 stmiaeq r0, {r2, r3} 214 m_ret inst=popeq 215 stmia r0!, {r2} 216 strh r3, [r0], #2 217 lsr r4, r3, #16 218 strb r4, [r0] 219 m_ret inst=pop 220 221.Lstrcpy_align_dst: 222 // Align to a double word (64 bits). 223 rsb r3, r3, #8 224 lsls ip, r3, #31 225 beq .Lstrcpy_align_to_32 226 227 ldrb r2, [r1], #1 228 strb r2, [r0], #1 229 cbz r2, .Lstrcpy_complete 230 231.Lstrcpy_align_to_32: 232 bcc .Lstrcpy_align_to_64 233 234 ldrb r4, [r1], #1 235 strb r4, [r0], #1 236 cmp r4, #0 237 it eq 238 m_ret inst=popeq 239 ldrb r5, [r1], #1 240 strb r5, [r0], #1 241 cmp r5, #0 242 it eq 243 m_ret inst=popeq 244 245.Lstrcpy_align_to_64: 246 tst r3, #4 247 beq .Lstrcpy_check_src_align 248 // Read one byte at a time since we don't know the src alignment 249 // and we don't want to read into a different page. 250 ldrb r4, [r1], #1 251 strb r4, [r0], #1 252 cbz r4, .Lstrcpy_complete 253 ldrb r5, [r1], #1 254 strb r5, [r0], #1 255 cbz r5, .Lstrcpy_complete 256 ldrb r4, [r1], #1 257 strb r4, [r0], #1 258 cbz r4, .Lstrcpy_complete 259 ldrb r5, [r1], #1 260 strb r5, [r0], #1 261 cbz r5, .Lstrcpy_complete 262 b .Lstrcpy_check_src_align 263 264.Lstrcpy_complete: 265 m_ret inst=pop 266 267.Lstrcpy_unaligned_copy: 268 // Dst is aligned to a double word, while src is at an unknown alignment. 269 // There are 7 different versions of the unaligned copy code 270 // to prevent overreading the src. The mainloop of every single version 271 // will store 64 bits per loop. The difference is how much of src can 272 // be read without potentially crossing a page boundary. 273 tbb [pc, r3] 274.Lstrcpy_unaligned_branchtable: 275 .byte 0 276 .byte ((.Lstrcpy_unalign7 - .Lstrcpy_unaligned_branchtable)/2) 277 .byte ((.Lstrcpy_unalign6 - .Lstrcpy_unaligned_branchtable)/2) 278 .byte ((.Lstrcpy_unalign5 - .Lstrcpy_unaligned_branchtable)/2) 279 .byte ((.Lstrcpy_unalign4 - .Lstrcpy_unaligned_branchtable)/2) 280 .byte ((.Lstrcpy_unalign3 - .Lstrcpy_unaligned_branchtable)/2) 281 .byte ((.Lstrcpy_unalign2 - .Lstrcpy_unaligned_branchtable)/2) 282 .byte ((.Lstrcpy_unalign1 - .Lstrcpy_unaligned_branchtable)/2) 283 284 .p2align 2 285 // Can read 7 bytes before possibly crossing a page. 286.Lstrcpy_unalign7: 287 ldr r2, [r1], #4 288 289 sub ip, r2, #0x01010101 290 bic ip, ip, r2 291 ands ip, ip, #0x80808080 292 bne .Lstrcpy_zero_in_first_register 293 294 ldrb r3, [r1] 295 cbz r3, .Lstrcpy_unalign7_copy5bytes 296 ldrb r4, [r1, #1] 297 cbz r4, .Lstrcpy_unalign7_copy6bytes 298 ldrb r5, [r1, #2] 299 cbz r5, .Lstrcpy_unalign7_copy7bytes 300 301 ldr r3, [r1], #4 302 pld [r1, #64] 303 304 lsrs ip, r3, #24 305 stmia r0!, {r2, r3} 306 beq .Lstrcpy_unalign_return 307 b .Lstrcpy_unalign7 308 309.Lstrcpy_unalign7_copy5bytes: 310 stmia r0!, {r2} 311 strb r3, [r0] 312.Lstrcpy_unalign_return: 313 m_ret inst=pop 314 315.Lstrcpy_unalign7_copy6bytes: 316 stmia r0!, {r2} 317 strb r3, [r0], #1 318 strb r4, [r0], #1 319 m_ret inst=pop 320 321.Lstrcpy_unalign7_copy7bytes: 322 stmia r0!, {r2} 323 strb r3, [r0], #1 324 strb r4, [r0], #1 325 strb r5, [r0], #1 326 m_ret inst=pop 327 328 .p2align 2 329 // Can read 6 bytes before possibly crossing a page. 330.Lstrcpy_unalign6: 331 ldr r2, [r1], #4 332 333 sub ip, r2, #0x01010101 334 bic ip, ip, r2 335 ands ip, ip, #0x80808080 336 bne .Lstrcpy_zero_in_first_register 337 338 ldrb r4, [r1] 339 cbz r4, .Lstrcpy_unalign_copy5bytes 340 ldrb r5, [r1, #1] 341 cbz r5, .Lstrcpy_unalign_copy6bytes 342 343 ldr r3, [r1], #4 344 pld [r1, #64] 345 346 tst r3, #0xff0000 347 beq .Lstrcpy_unalign6_copy7bytes 348 lsrs ip, r3, #24 349 stmia r0!, {r2, r3} 350 beq .Lstrcpy_unalign_return 351 b .Lstrcpy_unalign6 352 353.Lstrcpy_unalign6_copy7bytes: 354 stmia r0!, {r2} 355 strh r3, [r0], #2 356 lsr r3, #16 357 strb r3, [r0] 358 m_ret inst=pop 359 360 .p2align 2 361 // Can read 5 bytes before possibly crossing a page. 362.Lstrcpy_unalign5: 363 ldr r2, [r1], #4 364 365 sub ip, r2, #0x01010101 366 bic ip, ip, r2 367 ands ip, ip, #0x80808080 368 bne .Lstrcpy_zero_in_first_register 369 370 ldrb r4, [r1] 371 cbz r4, .Lstrcpy_unalign_copy5bytes 372 373 ldr r3, [r1], #4 374 375 pld [r1, #64] 376 377 sub ip, r3, #0x01010101 378 bic ip, ip, r3 379 ands ip, ip, #0x80808080 380 bne .Lstrcpy_zero_in_second_register 381 382 stmia r0!, {r2, r3} 383 b .Lstrcpy_unalign5 384 385.Lstrcpy_unalign_copy5bytes: 386 stmia r0!, {r2} 387 strb r4, [r0] 388 m_ret inst=pop 389 390.Lstrcpy_unalign_copy6bytes: 391 stmia r0!, {r2} 392 strb r4, [r0], #1 393 strb r5, [r0] 394 m_ret inst=pop 395 396 .p2align 2 397 // Can read 4 bytes before possibly crossing a page. 398.Lstrcpy_unalign4: 399 ldmia r1!, {r2} 400 401 sub ip, r2, #0x01010101 402 bic ip, ip, r2 403 ands ip, ip, #0x80808080 404 bne .Lstrcpy_zero_in_first_register 405 406 ldmia r1!, {r3} 407 pld [r1, #64] 408 409 sub ip, r3, #0x01010101 410 bic ip, ip, r3 411 ands ip, ip, #0x80808080 412 bne .Lstrcpy_zero_in_second_register 413 414 stmia r0!, {r2, r3} 415 b .Lstrcpy_unalign4 416 417 .p2align 2 418 // Can read 3 bytes before possibly crossing a page. 419.Lstrcpy_unalign3: 420 ldrb r2, [r1] 421 cbz r2, .Lstrcpy_unalign3_copy1byte 422 ldrb r3, [r1, #1] 423 cbz r3, .Lstrcpy_unalign3_copy2bytes 424 ldrb r4, [r1, #2] 425 cbz r4, .Lstrcpy_unalign3_copy3bytes 426 427 ldr r2, [r1], #4 428 ldr r3, [r1], #4 429 430 pld [r1, #64] 431 432 lsrs lr, r2, #24 433 beq .Lstrcpy_unalign_copy4bytes 434 435 sub ip, r3, #0x01010101 436 bic ip, ip, r3 437 ands ip, ip, #0x80808080 438 bne .Lstrcpy_zero_in_second_register 439 440 stmia r0!, {r2, r3} 441 b .Lstrcpy_unalign3 442 443.Lstrcpy_unalign3_copy1byte: 444 strb r2, [r0] 445 m_ret inst=pop 446 447.Lstrcpy_unalign3_copy2bytes: 448 strb r2, [r0], #1 449 strb r3, [r0] 450 m_ret inst=pop 451 452.Lstrcpy_unalign3_copy3bytes: 453 strb r2, [r0], #1 454 strb r3, [r0], #1 455 strb r4, [r0] 456 m_ret inst=pop 457 458 .p2align 2 459 // Can read 2 bytes before possibly crossing a page. 460.Lstrcpy_unalign2: 461 ldrb r2, [r1] 462 cbz r2, .Lstrcpy_unalign_copy1byte 463 ldrb r3, [r1, #1] 464 cbz r3, .Lstrcpy_unalign_copy2bytes 465 466 ldr r2, [r1], #4 467 ldr r3, [r1], #4 468 pld [r1, #64] 469 470 tst r2, #0xff0000 471 beq .Lstrcpy_unalign_copy3bytes 472 lsrs ip, r2, #24 473 beq .Lstrcpy_unalign_copy4bytes 474 475 sub ip, r3, #0x01010101 476 bic ip, ip, r3 477 ands ip, ip, #0x80808080 478 bne .Lstrcpy_zero_in_second_register 479 480 stmia r0!, {r2, r3} 481 b .Lstrcpy_unalign2 482 483 .p2align 2 484 // Can read 1 byte before possibly crossing a page. 485.Lstrcpy_unalign1: 486 ldrb r2, [r1] 487 cbz r2, .Lstrcpy_unalign_copy1byte 488 489 ldr r2, [r1], #4 490 ldr r3, [r1], #4 491 492 pld [r1, #64] 493 494 sub ip, r2, #0x01010101 495 bic ip, ip, r2 496 ands ip, ip, #0x80808080 497 bne .Lstrcpy_zero_in_first_register 498 499 sub ip, r3, #0x01010101 500 bic ip, ip, r3 501 ands ip, ip, #0x80808080 502 bne .Lstrcpy_zero_in_second_register 503 504 stmia r0!, {r2, r3} 505 b .Lstrcpy_unalign1 506 507.Lstrcpy_unalign_copy1byte: 508 strb r2, [r0] 509 m_ret inst=pop 510 511.Lstrcpy_unalign_copy2bytes: 512 strb r2, [r0], #1 513 strb r3, [r0] 514 m_ret inst=pop 515 516.Lstrcpy_unalign_copy3bytes: 517 strh r2, [r0], #2 518 lsr r2, #16 519 strb r2, [r0] 520 m_ret inst=pop 521 522.Lstrcpy_unalign_copy4bytes: 523 stmia r0, {r2} 524 m_ret inst=pop 525 526.Lstrcat_align_src: 527 // Align to a double word (64 bits). 528 rsb r3, r3, #8 529 lsls ip, r3, #31 530 beq .Lstrcat_align_to_32 531 ldrb r2, [r0], #1 532 cbz r2, .Lstrcat_r0_update 533 534.Lstrcat_align_to_32: 535 bcc .Lstrcat_align_to_64 536 ldrb r2, [r0], #1 537 cbz r2, .Lstrcat_r0_update 538 ldrb r2, [r0], #1 539 cbz r2, .Lstrcat_r0_update 540 541.Lstrcat_align_to_64: 542 tst r3, #4 543 beq .Lstrcat_mainloop 544 ldr r3, [r0], #4 545 546 sub ip, r3, #0x01010101 547 bic ip, ip, r3 548 ands ip, ip, #0x80808080 549 bne .Lstrcat_zero_in_second_register 550 b .Lstrcat_mainloop 551 552.Lstrcat_r0_update: 553 sub r0, r0, #1 554 b .Lstrcat_r0_scan_done 555END(strcat) 556