1/* 2 * Copyright (C) 2013 The Android Open Source Project 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in 12 * the documentation and/or other materials provided with the 13 * distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 16 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 17 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS 18 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE 19 * COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, 20 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, 21 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS 22 * OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED 23 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT 25 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28/* 29 * Copyright (c) 2013 ARM Ltd 30 * All rights reserved. 31 * 32 * Redistribution and use in source and binary forms, with or without 33 * modification, are permitted provided that the following conditions 34 * are met: 35 * 1. Redistributions of source code must retain the above copyright 36 * notice, this list of conditions and the following disclaimer. 37 * 2. Redistributions in binary form must reproduce the above copyright 38 * notice, this list of conditions and the following disclaimer in the 39 * documentation and/or other materials provided with the distribution. 40 * 3. The name of the company may not be used to endorse or promote 41 * products derived from this software without specific prior written 42 * permission. 43 * 44 * THIS SOFTWARE IS PROVIDED BY ARM LTD ``AS IS'' AND ANY EXPRESS OR IMPLIED 45 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF 46 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 47 * IN NO EVENT SHALL ARM LTD BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 48 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED 49 * TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR 50 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF 51 * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING 52 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 53 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 54 */ 55 56#include <private/bionic_asm.h> 57 58 .syntax unified 59 60 .thumb 61 .thumb_func 62 63 .macro m_push 64 push {r0, r4, r5, lr} 65 .endm // m_push 66 67 .macro m_ret inst 68 \inst {r0, r4, r5, pc} 69 .endm // m_ret 70 71 .macro m_scan_byte 72 ldrb r3, [r0] 73 cbz r3, strcat_r0_scan_done 74 add r0, #1 75 .endm // m_scan_byte 76 77 .macro m_copy_byte reg, cmd, label 78 ldrb \reg, [r1], #1 79 strb \reg, [r0], #1 80 \cmd \reg, \label 81 .endm // m_copy_byte 82 83ENTRY(strcat) 84 // Quick check to see if src is empty. 85 ldrb r2, [r1] 86 pld [r1, #0] 87 cbnz r2, strcat_continue 88 bx lr 89 90strcat_continue: 91 // To speed up really small dst strings, unroll checking the first 4 bytes. 92 m_push 93 m_scan_byte 94 m_scan_byte 95 m_scan_byte 96 m_scan_byte 97 98 ands r3, r0, #7 99 bne strcat_align_src 100 101 .p2align 2 102strcat_mainloop: 103 ldmia r0!, {r2, r3} 104 105 pld [r0, #64] 106 107 sub ip, r2, #0x01010101 108 bic ip, ip, r2 109 ands ip, ip, #0x80808080 110 bne strcat_zero_in_first_register 111 112 sub ip, r3, #0x01010101 113 bic ip, ip, r3 114 ands ip, ip, #0x80808080 115 bne strcat_zero_in_second_register 116 b strcat_mainloop 117 118strcat_zero_in_first_register: 119 sub r0, r0, #4 120 121strcat_zero_in_second_register: 122 // Check for zero in byte 0. 123 tst ip, #0x80 124 it ne 125 subne r0, r0, #4 126 bne strcat_r0_scan_done 127 // Check for zero in byte 1. 128 tst ip, #0x8000 129 it ne 130 subne r0, r0, #3 131 bne strcat_r0_scan_done 132 // Check for zero in byte 2. 133 tst ip, #0x800000 134 it ne 135 subne r0, r0, #2 136 it eq 137 // Zero is in byte 3. 138 subeq r0, r0, #1 139 140strcat_r0_scan_done: 141 // Unroll the first 8 bytes that will be copied. 142 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish 143 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish 144 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish 145 m_copy_byte reg=r5, cmd=cbz, label=strcpy_finish 146 m_copy_byte reg=r2, cmd=cbz, label=strcpy_finish 147 m_copy_byte reg=r3, cmd=cbz, label=strcpy_finish 148 m_copy_byte reg=r4, cmd=cbz, label=strcpy_finish 149 m_copy_byte reg=r5, cmd=cbnz, label=strcpy_continue 150 151strcpy_finish: 152 m_ret inst=pop 153 154strcpy_continue: 155 pld [r1, #0] 156 ands r3, r0, #7 157 bne strcpy_align_dst 158 159strcpy_check_src_align: 160 // At this point dst is aligned to a double word, check if src 161 // is also aligned to a double word. 162 ands r3, r1, #7 163 bne strcpy_unaligned_copy 164 165 .p2align 2 166strcpy_mainloop: 167 ldmia r1!, {r2, r3} 168 169 pld [r1, #64] 170 171 sub ip, r2, #0x01010101 172 bic ip, ip, r2 173 ands ip, ip, #0x80808080 174 bne strcpy_zero_in_first_register 175 176 sub ip, r3, #0x01010101 177 bic ip, ip, r3 178 ands ip, ip, #0x80808080 179 bne strcpy_zero_in_second_register 180 181 stmia r0!, {r2, r3} 182 b strcpy_mainloop 183 184strcpy_zero_in_first_register: 185 lsls lr, ip, #17 186 itt ne 187 strbne r2, [r0] 188 m_ret inst=popne 189 itt cs 190 strhcs r2, [r0] 191 m_ret inst=popcs 192 lsls ip, ip, #1 193 itt eq 194 streq r2, [r0] 195 m_ret inst=popeq 196 strh r2, [r0], #2 197 lsr r3, r2, #16 198 strb r3, [r0] 199 m_ret inst=pop 200 201strcpy_zero_in_second_register: 202 lsls lr, ip, #17 203 ittt ne 204 stmiane r0!, {r2} 205 strbne r3, [r0] 206 m_ret inst=popne 207 ittt cs 208 strcs r2, [r0], #4 209 strhcs r3, [r0] 210 m_ret inst=popcs 211 lsls ip, ip, #1 212 itt eq 213 stmiaeq r0, {r2, r3} 214 m_ret inst=popeq 215 stmia r0!, {r2} 216 strh r3, [r0], #2 217 lsr r4, r3, #16 218 strb r4, [r0] 219 m_ret inst=pop 220 221strcpy_align_dst: 222 // Align to a double word (64 bits). 223 rsb r3, r3, #8 224 lsls ip, r3, #31 225 beq strcpy_align_to_32 226 227 ldrb r2, [r1], #1 228 strb r2, [r0], #1 229 cbz r2, strcpy_complete 230 231strcpy_align_to_32: 232 bcc strcpy_align_to_64 233 234 ldrb r4, [r1], #1 235 strb r4, [r0], #1 236 cmp r4, #0 237 it eq 238 m_ret inst=popeq 239 ldrb r5, [r1], #1 240 strb r5, [r0], #1 241 cmp r5, #0 242 it eq 243 m_ret inst=popeq 244 245strcpy_align_to_64: 246 tst r3, #4 247 beq strcpy_check_src_align 248 ldr r2, [r1], #4 249 250 sub ip, r2, #0x01010101 251 bic ip, ip, r2 252 ands ip, ip, #0x80808080 253 bne strcpy_zero_in_first_register 254 stmia r0!, {r2} 255 b strcpy_check_src_align 256 257strcpy_complete: 258 m_ret inst=pop 259 260strcpy_unaligned_copy: 261 // Dst is aligned to a double word, while src is at an unknown alignment. 262 // There are 7 different versions of the unaligned copy code 263 // to prevent overreading the src. The mainloop of every single version 264 // will store 64 bits per loop. The difference is how much of src can 265 // be read without potentially crossing a page boundary. 266 tbb [pc, r3] 267strcpy_unaligned_branchtable: 268 .byte 0 269 .byte ((strcpy_unalign7 - strcpy_unaligned_branchtable)/2) 270 .byte ((strcpy_unalign6 - strcpy_unaligned_branchtable)/2) 271 .byte ((strcpy_unalign5 - strcpy_unaligned_branchtable)/2) 272 .byte ((strcpy_unalign4 - strcpy_unaligned_branchtable)/2) 273 .byte ((strcpy_unalign3 - strcpy_unaligned_branchtable)/2) 274 .byte ((strcpy_unalign2 - strcpy_unaligned_branchtable)/2) 275 .byte ((strcpy_unalign1 - strcpy_unaligned_branchtable)/2) 276 277 .p2align 2 278 // Can read 7 bytes before possibly crossing a page. 279strcpy_unalign7: 280 ldr r2, [r1], #4 281 282 sub ip, r2, #0x01010101 283 bic ip, ip, r2 284 ands ip, ip, #0x80808080 285 bne strcpy_zero_in_first_register 286 287 ldrb r3, [r1] 288 cbz r3, strcpy_unalign7_copy5bytes 289 ldrb r4, [r1, #1] 290 cbz r4, strcpy_unalign7_copy6bytes 291 ldrb r5, [r1, #2] 292 cbz r5, strcpy_unalign7_copy7bytes 293 294 ldr r3, [r1], #4 295 pld [r1, #64] 296 297 lsrs ip, r3, #24 298 stmia r0!, {r2, r3} 299 beq strcpy_unalign_return 300 b strcpy_unalign7 301 302strcpy_unalign7_copy5bytes: 303 stmia r0!, {r2} 304 strb r3, [r0] 305strcpy_unalign_return: 306 m_ret inst=pop 307 308strcpy_unalign7_copy6bytes: 309 stmia r0!, {r2} 310 strb r3, [r0], #1 311 strb r4, [r0], #1 312 m_ret inst=pop 313 314strcpy_unalign7_copy7bytes: 315 stmia r0!, {r2} 316 strb r3, [r0], #1 317 strb r4, [r0], #1 318 strb r5, [r0], #1 319 m_ret inst=pop 320 321 .p2align 2 322 // Can read 6 bytes before possibly crossing a page. 323strcpy_unalign6: 324 ldr r2, [r1], #4 325 326 sub ip, r2, #0x01010101 327 bic ip, ip, r2 328 ands ip, ip, #0x80808080 329 bne strcpy_zero_in_first_register 330 331 ldrb r4, [r1] 332 cbz r4, strcpy_unalign_copy5bytes 333 ldrb r5, [r1, #1] 334 cbz r5, strcpy_unalign_copy6bytes 335 336 ldr r3, [r1], #4 337 pld [r1, #64] 338 339 tst r3, #0xff0000 340 beq strcpy_unalign6_copy7bytes 341 lsrs ip, r3, #24 342 stmia r0!, {r2, r3} 343 beq strcpy_unalign_return 344 b strcpy_unalign6 345 346strcpy_unalign6_copy7bytes: 347 stmia r0!, {r2} 348 strh r3, [r0], #2 349 lsr r3, #16 350 strb r3, [r0] 351 m_ret inst=pop 352 353 .p2align 2 354 // Can read 5 bytes before possibly crossing a page. 355strcpy_unalign5: 356 ldr r2, [r1], #4 357 358 sub ip, r2, #0x01010101 359 bic ip, ip, r2 360 ands ip, ip, #0x80808080 361 bne strcpy_zero_in_first_register 362 363 ldrb r4, [r1] 364 cbz r4, strcpy_unalign_copy5bytes 365 366 ldr r3, [r1], #4 367 368 pld [r1, #64] 369 370 sub ip, r3, #0x01010101 371 bic ip, ip, r3 372 ands ip, ip, #0x80808080 373 bne strcpy_zero_in_second_register 374 375 stmia r0!, {r2, r3} 376 b strcpy_unalign5 377 378strcpy_unalign_copy5bytes: 379 stmia r0!, {r2} 380 strb r4, [r0] 381 m_ret inst=pop 382 383strcpy_unalign_copy6bytes: 384 stmia r0!, {r2} 385 strb r4, [r0], #1 386 strb r5, [r0] 387 m_ret inst=pop 388 389 .p2align 2 390 // Can read 4 bytes before possibly crossing a page. 391strcpy_unalign4: 392 ldmia r1!, {r2} 393 394 sub ip, r2, #0x01010101 395 bic ip, ip, r2 396 ands ip, ip, #0x80808080 397 bne strcpy_zero_in_first_register 398 399 ldmia r1!, {r3} 400 pld [r1, #64] 401 402 sub ip, r3, #0x01010101 403 bic ip, ip, r3 404 ands ip, ip, #0x80808080 405 bne strcpy_zero_in_second_register 406 407 stmia r0!, {r2, r3} 408 b strcpy_unalign4 409 410 .p2align 2 411 // Can read 3 bytes before possibly crossing a page. 412strcpy_unalign3: 413 ldrb r2, [r1] 414 cbz r2, strcpy_unalign3_copy1byte 415 ldrb r3, [r1, #1] 416 cbz r3, strcpy_unalign3_copy2bytes 417 ldrb r4, [r1, #2] 418 cbz r4, strcpy_unalign3_copy3bytes 419 420 ldr r2, [r1], #4 421 ldr r3, [r1], #4 422 423 pld [r1, #64] 424 425 lsrs lr, r2, #24 426 beq strcpy_unalign_copy4bytes 427 428 sub ip, r3, #0x01010101 429 bic ip, ip, r3 430 ands ip, ip, #0x80808080 431 bne strcpy_zero_in_second_register 432 433 stmia r0!, {r2, r3} 434 b strcpy_unalign3 435 436strcpy_unalign3_copy1byte: 437 strb r2, [r0] 438 m_ret inst=pop 439 440strcpy_unalign3_copy2bytes: 441 strb r2, [r0], #1 442 strb r3, [r0] 443 m_ret inst=pop 444 445strcpy_unalign3_copy3bytes: 446 strb r2, [r0], #1 447 strb r3, [r0], #1 448 strb r4, [r0] 449 m_ret inst=pop 450 451 .p2align 2 452 // Can read 2 bytes before possibly crossing a page. 453strcpy_unalign2: 454 ldrb r2, [r1] 455 cbz r2, strcpy_unalign_copy1byte 456 ldrb r3, [r1, #1] 457 cbz r3, strcpy_unalign_copy2bytes 458 459 ldr r2, [r1], #4 460 ldr r3, [r1], #4 461 pld [r1, #64] 462 463 tst r2, #0xff0000 464 beq strcpy_unalign_copy3bytes 465 lsrs ip, r2, #24 466 beq strcpy_unalign_copy4bytes 467 468 sub ip, r3, #0x01010101 469 bic ip, ip, r3 470 ands ip, ip, #0x80808080 471 bne strcpy_zero_in_second_register 472 473 stmia r0!, {r2, r3} 474 b strcpy_unalign2 475 476 .p2align 2 477 // Can read 1 byte before possibly crossing a page. 478strcpy_unalign1: 479 ldrb r2, [r1] 480 cbz r2, strcpy_unalign_copy1byte 481 482 ldr r2, [r1], #4 483 ldr r3, [r1], #4 484 485 pld [r1, #64] 486 487 sub ip, r2, #0x01010101 488 bic ip, ip, r2 489 ands ip, ip, #0x80808080 490 bne strcpy_zero_in_first_register 491 492 sub ip, r3, #0x01010101 493 bic ip, ip, r3 494 ands ip, ip, #0x80808080 495 bne strcpy_zero_in_second_register 496 497 stmia r0!, {r2, r3} 498 b strcpy_unalign1 499 500strcpy_unalign_copy1byte: 501 strb r2, [r0] 502 m_ret inst=pop 503 504strcpy_unalign_copy2bytes: 505 strb r2, [r0], #1 506 strb r3, [r0] 507 m_ret inst=pop 508 509strcpy_unalign_copy3bytes: 510 strh r2, [r0], #2 511 lsr r2, #16 512 strb r2, [r0] 513 m_ret inst=pop 514 515strcpy_unalign_copy4bytes: 516 stmia r0, {r2} 517 m_ret inst=pop 518 519strcat_align_src: 520 // Align to a double word (64 bits). 521 rsb r3, r3, #8 522 lsls ip, r3, #31 523 beq strcat_align_to_32 524 ldrb r2, [r0], #1 525 cbz r2, strcat_r0_update 526 527strcat_align_to_32: 528 bcc strcat_align_to_64 529 ldrb r2, [r0], #1 530 cbz r2, strcat_r0_update 531 ldrb r2, [r0], #1 532 cbz r2, strcat_r0_update 533 534strcat_align_to_64: 535 tst r3, #4 536 beq strcat_mainloop 537 ldr r3, [r0], #4 538 539 sub ip, r3, #0x01010101 540 bic ip, ip, r3 541 ands ip, ip, #0x80808080 542 bne strcat_zero_in_second_register 543 b strcat_mainloop 544 545strcat_r0_update: 546 sub r0, r0, #1 547 b strcat_r0_scan_done 548END(strcat) 549