1/* 2 * Copyright (c) 2017 Imagination Technologies. 3 * 4 * All rights reserved. 5 * 6 * Redistribution and use in source and binary forms, with or without 7 * modification, are permitted provided that the following conditions 8 * are met: 9 * 10 * * Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * * Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer 14 * in the documentation and/or other materials provided with 15 * the distribution. 16 * * Neither the name of Imagination Technologies nor the names of its 17 * contributors may be used to endorse or promote products derived 18 * from this software without specific prior written permission. 19 * 20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 31 */ 32 33#ifdef __ANDROID__ 34# include <private/bionic_asm.h> 35#elif _LIBC 36# include <sysdep.h> 37# include <regdef.h> 38# include <sys/asm.h> 39#elif _COMPILING_NEWLIB 40# include "machine/asm.h" 41# include "machine/regdef.h" 42#else 43# include <regdef.h> 44# include <sys/asm.h> 45#endif 46 47#if __mips64 48# define NSIZE 8 49# define LW ld 50# define LWR ldr 51# define LWL ldl 52# define EXT dext 53# define SRL dsrl 54# define SUBU dsubu 55#else 56# define NSIZE 4 57# define LW lw 58# define LWR lwr 59# define LWL lwl 60# define EXT ext 61# define SRL srl 62# define SUBU subu 63#endif 64 65/* Technically strcmp should not read past the end of the strings being 66 compared. We will read a full word that may contain excess bits beyond 67 the NULL string terminator but unless ENABLE_READAHEAD is set, we will not 68 read the next word after the end of string. Setting ENABLE_READAHEAD will 69 improve performance but is technically illegal based on the definition of 70 strcmp. */ 71#ifdef ENABLE_READAHEAD 72# define DELAY_READ 73#else 74# define DELAY_READ nop 75#endif 76 77/* Testing on a little endian machine showed using CLZ was a 78 performance loss, so we are not turning it on by default. */ 79#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) && (!__mips64) 80# define USE_CLZ 81#endif 82 83/* Some asm.h files do not have the L macro definition. */ 84#ifndef L 85# if _MIPS_SIM == _ABIO32 86# define L(label) $L ## label 87# else 88# define L(label) .L ## label 89# endif 90#endif 91 92/* Some asm.h files do not have the PTR_ADDIU macro definition. */ 93#ifndef PTR_ADDIU 94# if _MIPS_SIM == _ABIO32 95# define PTR_ADDIU addiu 96# else 97# define PTR_ADDIU daddiu 98# endif 99#endif 100 101/* It might seem better to do the 'beq' instruction between the two 'lbu' 102 instructions so that the nop is not needed but testing showed that this 103 code is actually faster (based on glibc strcmp test). */ 104#define BYTECMP01(OFFSET) \ 105 lbu v0, OFFSET(a0); \ 106 lbu v1, OFFSET(a1); \ 107 beq v0, zero, L(bexit01); \ 108 nop; \ 109 bne v0, v1, L(bexit01) 110 111#define BYTECMP89(OFFSET) \ 112 lbu t8, OFFSET(a0); \ 113 lbu t9, OFFSET(a1); \ 114 beq t8, zero, L(bexit89); \ 115 nop; \ 116 bne t8, t9, L(bexit89) 117 118/* Allow the routine to be named something else if desired. */ 119#ifndef STRNCMP_NAME 120# define STRNCMP_NAME strncmp 121#endif 122 123#ifdef __ANDROID__ 124LEAF(STRNCMP_NAME, 0) 125#else 126LEAF(STRNCMP_NAME) 127#endif 128 .set nomips16 129 .set noreorder 130 131 srl t0, a2, (2 + NSIZE / 4) 132 beqz t0, L(byteloop) #process by bytes if less than (2 * NSIZE) 133 andi t1, a1, (NSIZE - 1) 134 beqz t1, L(exitalign) 135 or t0, zero, NSIZE 136 SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max 137 SUBU a2, a2, t1 #dec count by t1 138 139L(alignloop): #do by bytes until a1 aligned 140 BYTECMP01(0) 141 SUBU t1, t1, 0x1 142 PTR_ADDIU a0, a0, 0x1 143 bne t1, zero, L(alignloop) 144 PTR_ADDIU a1, a1, 0x1 145 146L(exitalign): 147 148/* string a1 is NSIZE byte aligned at this point. */ 149#ifndef __mips1 150 lui t8, 0x0101 151 ori t8, 0x0101 152 lui t9, 0x7f7f 153 ori t9, 0x7f7f 154#if __mips64 155 dsll t0, t8, 32 156 or t8, t0 157 dsll t1, t9, 32 158 or t9, t1 159#endif 160#endif 161 162/* hardware or software alignment not supported for mips1 163 rev6 archs have h/w unaligned support 164 remainings archs need to implemented with unaligned instructions */ 165 166#if __mips1 167 andi t0, a0, (NSIZE - 1) 168 bne t0, zero, L(byteloop) 169#elif __mips_isa_rev < 6 170 andi t0, a0, (NSIZE - 1) 171 bne t0, zero, L(uwordloop) 172#endif 173 174#define STRCMPW(OFFSET) \ 175 LW v0, (OFFSET)(a0); \ 176 LW v1, (OFFSET)(a1); \ 177 SUBU t0, v0, t8; \ 178 bne v0, v1, L(worddiff); \ 179 nor t1, v0, t9; \ 180 and t0, t0, t1; \ 181 bne t0, zero, L(returnzero);\ 182 183L(wordloop): 184 SUBU t1, a2, (8 * NSIZE) 185 bltz t1, L(onewords) 186 STRCMPW(0 * NSIZE) 187 DELAY_READ 188 STRCMPW(1 * NSIZE) 189 DELAY_READ 190 STRCMPW(2 * NSIZE) 191 DELAY_READ 192 STRCMPW(3 * NSIZE) 193 DELAY_READ 194 STRCMPW(4 * NSIZE) 195 DELAY_READ 196 STRCMPW(5 * NSIZE) 197 DELAY_READ 198 STRCMPW(6 * NSIZE) 199 DELAY_READ 200 STRCMPW(7 * NSIZE) 201 SUBU a2, a2, (8 * NSIZE) 202 PTR_ADDIU a0, a0, (8 * NSIZE) 203 b L(wordloop) 204 PTR_ADDIU a1, a1, (8 * NSIZE) 205 206L(onewords): 207 SUBU t1, a2, NSIZE 208 bltz t1, L(byteloop) 209 STRCMPW(0) 210 SUBU a2, a2, NSIZE 211 PTR_ADDIU a0, a0, NSIZE 212 b L(onewords) 213 PTR_ADDIU a1, a1, NSIZE 214 215#if __mips_isa_rev < 6 && !__mips1 216#define USTRCMPW(OFFSET) \ 217 LWR v0, (OFFSET)(a0); \ 218 LWL v0, (OFFSET + NSIZE - 1)(a0); \ 219 LW v1, (OFFSET)(a1); \ 220 SUBU t0, v0, t8; \ 221 bne v0, v1, L(worddiff); \ 222 nor t1, v0, t9; \ 223 and t0, t0, t1; \ 224 bne t0, zero, L(returnzero);\ 225 226L(uwordloop): 227 SUBU t1, a2, (8 * NSIZE) 228 bltz t1, L(uonewords) 229 USTRCMPW(0 * NSIZE) 230 DELAY_READ 231 USTRCMPW(1 * NSIZE) 232 DELAY_READ 233 USTRCMPW(2 * NSIZE) 234 DELAY_READ 235 USTRCMPW(3 * NSIZE) 236 DELAY_READ 237 USTRCMPW(4 * NSIZE) 238 DELAY_READ 239 USTRCMPW(5 * NSIZE) 240 DELAY_READ 241 USTRCMPW(6 * NSIZE) 242 DELAY_READ 243 USTRCMPW(7 * NSIZE) 244 SUBU a2, a2, (8 * NSIZE) 245 PTR_ADDIU a0, a0, (8 * NSIZE) 246 b L(uwordloop) 247 PTR_ADDIU a1, a1, (8 * NSIZE) 248 249L(uonewords): 250 SUBU t1, a2, NSIZE 251 bltz t1, L(byteloop) 252 USTRCMPW(0) 253 SUBU a2, a2, NSIZE 254 PTR_ADDIU a0, a0, NSIZE 255 b L(uonewords) 256 PTR_ADDIU a1, a1, NSIZE 257 258#endif 259 260L(returnzero): 261 j ra 262 move v0, zero 263 264#if __mips_isa_rev > 1 265#define EXT_COMPARE01(POS) \ 266 EXT t0, v0, POS, 8; \ 267 beq t0, zero, L(wexit01); \ 268 EXT t1, v1, POS, 8; \ 269 bne t0, t1, L(wexit01) 270#define EXT_COMPARE89(POS) \ 271 EXT t8, v0, POS, 8; \ 272 beq t8, zero, L(wexit89); \ 273 EXT t9, v1, POS, 8; \ 274 bne t8, t9, L(wexit89) 275#else 276#define EXT_COMPARE01(POS) \ 277 SRL t0, v0, POS; \ 278 SRL t1, v1, POS; \ 279 andi t0, t0, 0xff; \ 280 beq t0, zero, L(wexit01); \ 281 andi t1, t1, 0xff; \ 282 bne t0, t1, L(wexit01) 283#define EXT_COMPARE89(POS) \ 284 SRL t8, v0, POS; \ 285 SRL t9, v1, POS; \ 286 andi t8, t8, 0xff; \ 287 beq t8, zero, L(wexit89); \ 288 andi t9, t9, 0xff; \ 289 bne t8, t9, L(wexit89) 290#endif 291 292L(worddiff): 293#ifdef USE_CLZ 294 SUBU t0, v0, t8 295 nor t1, v0, t9 296 and t1, t0, t1 297 xor t0, v0, v1 298 or t0, t0, t1 299# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 300 wsbh t0, t0 301 rotr t0, t0, 16 302# endif 303 clz t1, t0 304 and t1, 0xf8 305# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 306 neg t1 307 addu t1, 24 308# endif 309 rotrv v0, v0, t1 310 rotrv v1, v1, t1 311 and v0, v0, 0xff 312 and v1, v1, 0xff 313 j ra 314 SUBU v0, v0, v1 315#else /* USE_CLZ */ 316# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 317 andi t0, v0, 0xff 318 beq t0, zero, L(wexit01) 319 andi t1, v1, 0xff 320 bne t0, t1, L(wexit01) 321 EXT_COMPARE89(8) 322 EXT_COMPARE01(16) 323#ifndef __mips64 324 SRL t8, v0, 24 325 SRL t9, v1, 24 326#else 327 EXT_COMPARE89(24) 328 EXT_COMPARE01(32) 329 EXT_COMPARE89(40) 330 EXT_COMPARE01(48) 331 SRL t8, v0, 56 332 SRL t9, v1, 56 333#endif 334 335# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ 336#ifdef __mips64 337 SRL t0, v0, 56 338 beq t0, zero, L(wexit01) 339 SRL t1, v1, 56 340 bne t0, t1, L(wexit01) 341 EXT_COMPARE89(48) 342 EXT_COMPARE01(40) 343 EXT_COMPARE89(32) 344 EXT_COMPARE01(24) 345#else 346 SRL t0, v0, 24 347 beq t0, zero, L(wexit01) 348 SRL t1, v1, 24 349 bne t0, t1, L(wexit01) 350#endif 351 EXT_COMPARE89(16) 352 EXT_COMPARE01(8) 353 354 andi t8, v0, 0xff 355 andi t9, v1, 0xff 356# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ 357 358L(wexit89): 359 j ra 360 SUBU v0, t8, t9 361L(wexit01): 362 j ra 363 SUBU v0, t0, t1 364#endif /* USE_CLZ */ 365 366L(byteloop): 367 beq a2, zero, L(returnzero) 368 SUBU a2, a2, 1 369 BYTECMP01(0) 370 nop 371 beq a2, zero, L(returnzero) 372 SUBU a2, a2, 1 373 BYTECMP89(1) 374 nop 375 beq a2, zero, L(returnzero) 376 SUBU a2, a2, 1 377 BYTECMP01(2) 378 nop 379 beq a2, zero, L(returnzero) 380 SUBU a2, a2, 1 381 BYTECMP89(3) 382 PTR_ADDIU a0, a0, 4 383 b L(byteloop) 384 PTR_ADDIU a1, a1, 4 385 386L(bexit01): 387 j ra 388 SUBU v0, v0, v1 389L(bexit89): 390 j ra 391 SUBU v0, t8, t9 392 393 .set at 394 .set reorder 395 396END(STRNCMP_NAME) 397#ifndef __ANDROID__ 398# ifdef _LIBC 399libc_hidden_builtin_def (STRNCMP_NAME) 400# endif 401#endif 402