1/*
2 * Copyright (c) 2014
3 *      Imagination Technologies Limited.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 *    notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 *    notice, this list of conditions and the following disclaimer in the
12 *    documentation and/or other materials provided with the distribution.
13 * 3. Neither the name of the MIPS Technologies, Inc., nor the names of its
14 *    contributors may be used to endorse or promote products derived from
15 *    this software without specific prior written permission.
16 *
17 * THIS SOFTWARE IS PROVIDED BY IMAGINATION TECHNOLOGIES LIMITED ``AS IS'' AND
18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 * ARE DISCLAIMED. IN NO EVENT SHALL IMAGINATION TECHNOLOGIES LIMITED BE LIABLE
21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27 * SUCH DAMAGE.
28 */
29
30#ifdef __ANDROID__
31# include <private/bionic_asm.h>
32#elif _LIBC
33# include <sysdep.h>
34# include <regdef.h>
35# include <sys/asm.h>
36#elif _COMPILING_NEWLIB
37# include "machine/asm.h"
38# include "machine/regdef.h"
39#else
40# include <regdef.h>
41# include <sys/asm.h>
42#endif
43
44/* Technically strcmp should not read past the end of the strings being
45   compared.  We will read a full word that may contain excess bits beyond
46   the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
47   read the next word after the end of string.  Setting ENABLE_READAHEAD will
48   improve performance but is technically illegal based on the definition of
49   strcmp.  */
50#ifdef ENABLE_READAHEAD
51# define DELAY_READ
52#else
53# define DELAY_READ nop
54#endif
55
56/* Testing on a little endian machine showed using CLZ was a
57   performance loss, so we are not turning it on by default.  */
58#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1)
59# define USE_CLZ
60#endif
61
62/* Some asm.h files do not have the L macro definition.  */
63#ifndef L
64# if _MIPS_SIM == _ABIO32
65#  define L(label) $L ## label
66# else
67#  define L(label) .L ## label
68# endif
69#endif
70
71/* Some asm.h files do not have the PTR_ADDIU macro definition.  */
72#ifndef PTR_ADDIU
73# if _MIPS_SIM == _ABIO32
74#  define PTR_ADDIU       addiu
75# else
76#  define PTR_ADDIU       daddiu
77# endif
78#endif
79
80/* Allow the routine to be named something else if desired.  */
81#ifndef STRCMP_NAME
82# define STRCMP_NAME strcmp
83#endif
84
85#ifdef __ANDROID__
86LEAF(STRCMP_NAME, 0)
87#else
88LEAF(STRCMP_NAME)
89#endif
90	.set	nomips16
91	.set	noreorder
92
93	or	t0, a0, a1
94	andi	t0,0x3
95	bne	t0, zero, L(byteloop)
96
97/* Both strings are 4 byte aligned at this point.  */
98
99	lui	t8, 0x0101
100	ori	t8, t8, 0x0101
101	lui	t9, 0x7f7f
102	ori	t9, 0x7f7f
103
104#define STRCMP32(OFFSET) \
105	lw	v0, OFFSET(a0); \
106	lw	v1, OFFSET(a1); \
107	subu	t0, v0, t8; \
108	bne	v0, v1, L(worddiff); \
109	nor	t1, v0, t9; \
110	and	t0, t0, t1; \
111	bne	t0, zero, L(returnzero)
112
113L(wordloop):
114	STRCMP32(0)
115	DELAY_READ
116	STRCMP32(4)
117	DELAY_READ
118	STRCMP32(8)
119	DELAY_READ
120	STRCMP32(12)
121	DELAY_READ
122	STRCMP32(16)
123	DELAY_READ
124	STRCMP32(20)
125	DELAY_READ
126	STRCMP32(24)
127	DELAY_READ
128	STRCMP32(28)
129	PTR_ADDIU a0, a0, 32
130	b	L(wordloop)
131	PTR_ADDIU a1, a1, 32
132
133L(returnzero):
134	j	ra
135	move	v0, zero
136
137L(worddiff):
138#ifdef USE_CLZ
139	subu	t0, v0, t8
140	nor	t1, v0, t9
141	and	t1, t0, t1
142	xor	t0, v0, v1
143	or	t0, t0, t1
144# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
145	wsbh	t0, t0
146	rotr	t0, t0, 16
147# endif
148	clz	t1, t0
149	and	t1, 0xf8
150# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
151	neg	t1
152	addu	t1, 24
153# endif
154	rotrv	v0, v0, t1
155	rotrv	v1, v1, t1
156	and	v0, v0, 0xff
157	and	v1, v1, 0xff
158	j	ra
159	subu	v0, v0, v1
160#else /* USE_CLZ */
161# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
162	andi	t0, v0, 0xff
163	beq	t0, zero, L(wexit01)
164	andi	t1, v1, 0xff
165	bne	t0, t1, L(wexit01)
166
167	srl	t8, v0, 8
168	srl	t9, v1, 8
169	andi	t8, t8, 0xff
170	beq	t8, zero, L(wexit89)
171	andi	t9, t9, 0xff
172	bne	t8, t9, L(wexit89)
173
174	srl	t0, v0, 16
175	srl	t1, v1, 16
176	andi	t0, t0, 0xff
177	beq	t0, zero, L(wexit01)
178	andi	t1, t1, 0xff
179	bne	t0, t1, L(wexit01)
180
181	srl	t8, v0, 24
182	srl	t9, v1, 24
183# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
184	srl	t0, v0, 24
185	beq	t0, zero, L(wexit01)
186	srl	t1, v1, 24
187	bne	t0, t1, L(wexit01)
188
189	srl	t8, v0, 16
190	srl	t9, v1, 16
191	andi	t8, t8, 0xff
192	beq	t8, zero, L(wexit89)
193	andi	t9, t9, 0xff
194	bne	t8, t9, L(wexit89)
195
196	srl	t0, v0, 8
197	srl	t1, v1, 8
198	andi	t0, t0, 0xff
199	beq	t0, zero, L(wexit01)
200	andi	t1, t1, 0xff
201	bne	t0, t1, L(wexit01)
202
203	andi	t8, v0, 0xff
204	andi	t9, v1, 0xff
205# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
206
207L(wexit89):
208	j	ra
209	subu	v0, t8, t9
210L(wexit01):
211	j	ra
212	subu	v0, t0, t1
213#endif /* USE_CLZ */
214
215/* It might seem better to do the 'beq' instruction between the two 'lbu'
216   instructions so that the nop is not needed but testing showed that this
217   code is actually faster (based on glibc strcmp test).  */
218#define BYTECMP01(OFFSET) \
219	lbu	v0, OFFSET(a0); \
220	lbu	v1, OFFSET(a1); \
221	beq	v0, zero, L(bexit01); \
222	nop; \
223	bne	v0, v1, L(bexit01)
224
225#define BYTECMP89(OFFSET) \
226	lbu	t8, OFFSET(a0); \
227	lbu	t9, OFFSET(a1); \
228	beq	t8, zero, L(bexit89); \
229	nop;	\
230	bne	t8, t9, L(bexit89)
231
232L(byteloop):
233	BYTECMP01(0)
234	BYTECMP89(1)
235	BYTECMP01(2)
236	BYTECMP89(3)
237	BYTECMP01(4)
238	BYTECMP89(5)
239	BYTECMP01(6)
240	BYTECMP89(7)
241	PTR_ADDIU a0, a0, 8
242	b	L(byteloop)
243	PTR_ADDIU a1, a1, 8
244
245L(bexit01):
246	j	ra
247	subu	v0, v0, v1
248L(bexit89):
249	j	ra
250	subu	v0, t8, t9
251
252	.set	at
253	.set	reorder
254
255END(STRCMP_NAME)
256#ifndef __ANDROID__
257# ifdef _LIBC
258libc_hidden_builtin_def (STRCMP_NAME)
259# endif
260#endif
261