1/* 2 * Copyright (c) 2014 3 * Imagination Technologies Limited. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its 14 * contributors may be used to endorse or promote products derived from 15 * this software without specific prior written permission. 16 * 17 * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30#ifdef __ANDROID__ 31# include <private/bionic_asm.h> 32#elif _LIBC 33# include <sysdep.h> 34# include <regdef.h> 35# include <sys/asm.h> 36#elif _COMPILING_NEWLIB 37# include "machine/asm.h" 38# include "machine/regdef.h" 39#else 40# include <regdef.h> 41# include <sys/asm.h> 42#endif 43 44/* Technically strcmp should not read past the end of the strings being 45 compared. We will read a full word that may contain excess bits beyond 46 the NULL string terminator but unless ENABLE_READAHEAD is set, we will not 47 read the next word after the end of string. Setting ENABLE_READAHEAD will 48 improve performance but is technically illegal based on the definition of 49 strcmp. */ 50#ifdef ENABLE_READAHEAD 51# define DELAY_READ 52#else 53# define DELAY_READ nop 54#endif 55 56/* Testing on a little endian machine showed using CLZ was a 57 performance loss, so we are not turning it on by default. */ 58#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) 59# define USE_CLZ 60#endif 61 62/* Some asm.h files do not have the L macro definition. */ 63#ifndef L 64# if _MIPS_SIM == _ABIO32 65# define L(label) $L ## label 66# else 67# define L(label) .L ## label 68# endif 69#endif 70 71/* Some asm.h files do not have the PTR_ADDIU macro definition. */ 72#ifndef PTR_ADDIU 73# if _MIPS_SIM == _ABIO32 74# define PTR_ADDIU addiu 75# else 76# define PTR_ADDIU daddiu 77# endif 78#endif 79 80/* Allow the routine to be named something else if desired. */ 81#ifndef STRCMP_NAME 82# define STRCMP_NAME strcmp 83#endif 84 85#ifdef __ANDROID__ 86LEAF(STRCMP_NAME, 0) 87#else 88LEAF(STRCMP_NAME) 89#endif 90 .set nomips16 91 .set noreorder 92 93 or t0, a0, a1 94 andi t0,0x3 95 bne t0, zero, L(byteloop) 96 97/* Both strings are 4 byte aligned at this point. */ 98 99 lui t8, 0x0101 100 ori t8, t8, 0x0101 101 lui t9, 0x7f7f 102 ori t9, 0x7f7f 103 104#define STRCMP32(OFFSET) \ 105 lw v0, OFFSET(a0); \ 106 lw v1, OFFSET(a1); \ 107 subu t0, v0, t8; \ 108 bne v0, v1, L(worddiff); \ 109 nor t1, v0, t9; \ 110 and t0, t0, t1; \ 111 bne t0, zero, L(returnzero) 112 113L(wordloop): 114 STRCMP32(0) 115 DELAY_READ 116 STRCMP32(4) 117 DELAY_READ 118 STRCMP32(8) 119 DELAY_READ 120 STRCMP32(12) 121 DELAY_READ 122 STRCMP32(16) 123 DELAY_READ 124 STRCMP32(20) 125 DELAY_READ 126 STRCMP32(24) 127 DELAY_READ 128 STRCMP32(28) 129 PTR_ADDIU a0, a0, 32 130 b L(wordloop) 131 PTR_ADDIU a1, a1, 32 132 133L(returnzero): 134 j ra 135 move v0, zero 136 137L(worddiff): 138#ifdef USE_CLZ 139 subu t0, v0, t8 140 nor t1, v0, t9 141 and t1, t0, t1 142 xor t0, v0, v1 143 or t0, t0, t1 144# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 145 wsbh t0, t0 146 rotr t0, t0, 16 147# endif 148 clz t1, t0 149 and t1, 0xf8 150# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ 151 neg t1 152 addu t1, 24 153# endif 154 rotrv v0, v0, t1 155 rotrv v1, v1, t1 156 and v0, v0, 0xff 157 and v1, v1, 0xff 158 j ra 159 subu v0, v0, v1 160#else /* USE_CLZ */ 161# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ 162 andi t0, v0, 0xff 163 beq t0, zero, L(wexit01) 164 andi t1, v1, 0xff 165 bne t0, t1, L(wexit01) 166 167 srl t8, v0, 8 168 srl t9, v1, 8 169 andi t8, t8, 0xff 170 beq t8, zero, L(wexit89) 171 andi t9, t9, 0xff 172 bne t8, t9, L(wexit89) 173 174 srl t0, v0, 16 175 srl t1, v1, 16 176 andi t0, t0, 0xff 177 beq t0, zero, L(wexit01) 178 andi t1, t1, 0xff 179 bne t0, t1, L(wexit01) 180 181 srl t8, v0, 24 182 srl t9, v1, 24 183# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ 184 srl t0, v0, 24 185 beq t0, zero, L(wexit01) 186 srl t1, v1, 24 187 bne t0, t1, L(wexit01) 188 189 srl t8, v0, 16 190 srl t9, v1, 16 191 andi t8, t8, 0xff 192 beq t8, zero, L(wexit89) 193 andi t9, t9, 0xff 194 bne t8, t9, L(wexit89) 195 196 srl t0, v0, 8 197 srl t1, v1, 8 198 andi t0, t0, 0xff 199 beq t0, zero, L(wexit01) 200 andi t1, t1, 0xff 201 bne t0, t1, L(wexit01) 202 203 andi t8, v0, 0xff 204 andi t9, v1, 0xff 205# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */ 206 207L(wexit89): 208 j ra 209 subu v0, t8, t9 210L(wexit01): 211 j ra 212 subu v0, t0, t1 213#endif /* USE_CLZ */ 214 215/* It might seem better to do the 'beq' instruction between the two 'lbu' 216 instructions so that the nop is not needed but testing showed that this 217 code is actually faster (based on glibc strcmp test). */ 218#define BYTECMP01(OFFSET) \ 219 lbu v0, OFFSET(a0); \ 220 lbu v1, OFFSET(a1); \ 221 beq v0, zero, L(bexit01); \ 222 nop; \ 223 bne v0, v1, L(bexit01) 224 225#define BYTECMP89(OFFSET) \ 226 lbu t8, OFFSET(a0); \ 227 lbu t9, OFFSET(a1); \ 228 beq t8, zero, L(bexit89); \ 229 nop; \ 230 bne t8, t9, L(bexit89) 231 232L(byteloop): 233 BYTECMP01(0) 234 BYTECMP89(1) 235 BYTECMP01(2) 236 BYTECMP89(3) 237 BYTECMP01(4) 238 BYTECMP89(5) 239 BYTECMP01(6) 240 BYTECMP89(7) 241 PTR_ADDIU a0, a0, 8 242 b L(byteloop) 243 PTR_ADDIU a1, a1, 8 244 245L(bexit01): 246 j ra 247 subu v0, v0, v1 248L(bexit89): 249 j ra 250 subu v0, t8, t9 251 252 .set at 253 .set reorder 254 255END(STRCMP_NAME) 256#ifndef __ANDROID__ 257# ifdef _LIBC 258libc_hidden_builtin_def (STRCMP_NAME) 259# endif 260#endif 261