1/*
2 * Copyright (c) 2017 Imagination Technologies.
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *      * Redistributions of source code must retain the above copyright
11 *        notice, this list of conditions and the following disclaimer.
12 *      * Redistributions in binary form must reproduce the above copyright
13 *        notice, this list of conditions and the following disclaimer
14 *        in the documentation and/or other materials provided with
15 *        the distribution.
16 *      * Neither the name of Imagination Technologies nor the names of its
17 *        contributors may be used to endorse or promote products derived
18 *        from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#ifdef __ANDROID__
34# include <private/bionic_asm.h>
35#elif _LIBC
36# include <sysdep.h>
37# include <regdef.h>
38# include <sys/asm.h>
39#elif _COMPILING_NEWLIB
40# include "machine/asm.h"
41# include "machine/regdef.h"
42#else
43# include <regdef.h>
44# include <sys/asm.h>
45#endif
46
47#if __mips64
48# define NSIZE 8
49# define LW ld
50# define EXT dext
51# define SRL dsrl
52# define SLL dsll
53# define SUBU dsubu
54#else
55# define NSIZE 4
56# define LW lw
57# define EXT ext
58# define SRL srl
59# define SLL sll
60# define SUBU subu
61#endif
62
63/* Technically strcmp should not read past the end of the strings being
64   compared.  We will read a full word that may contain excess bits beyond
65   the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
66   read the next word after the end of string.  Setting ENABLE_READAHEAD will
67   improve performance but is technically illegal based on the definition of
68   strcmp.  */
69#ifdef ENABLE_READAHEAD
70# define DELAY_READ
71#else
72# define DELAY_READ nop
73#endif
74
75/* Testing on a little endian machine showed using CLZ was a
76   performance loss, so we are not turning it on by default.  */
77#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1)
78# define USE_CLZ
79#endif
80
81/* Some asm.h files do not have the L macro definition.  */
82#ifndef L
83# if _MIPS_SIM == _ABIO32
84#  define L(label) $L ## label
85# else
86#  define L(label) .L ## label
87# endif
88#endif
89
90/* Some asm.h files do not have the PTR_ADDIU macro definition.  */
91#ifndef PTR_ADDIU
92# if _MIPS_SIM == _ABIO32
93#  define PTR_ADDIU       addiu
94# else
95#  define PTR_ADDIU       daddiu
96# endif
97#endif
98
99/* It might seem better to do the 'beq' instruction between the two 'lbu'
100   instructions so that the nop is not needed but testing showed that this
101   code is actually faster (based on glibc strcmp test).  */
102#define BYTECMP01(OFFSET) \
103    lbu v0, OFFSET(a0); \
104    lbu v1, OFFSET(a1); \
105    beq v0, zero, L(bexit01); \
106    nop; \
107    bne v0, v1, L(bexit01)
108
109#define BYTECMP89(OFFSET) \
110    lbu t8, OFFSET(a0); \
111    lbu t9, OFFSET(a1); \
112    beq t8, zero, L(bexit89); \
113    nop;    \
114    bne t8, t9, L(bexit89)
115
116/* Allow the routine to be named something else if desired.  */
117#ifndef STRCMP_NAME
118# define STRCMP_NAME strcmp
119#endif
120
121#ifdef __ANDROID__
122LEAF(STRCMP_NAME, 0)
123#else
124LEAF(STRCMP_NAME)
125#endif
126    .set    nomips16
127    .set    noreorder
128
129    andi t1, a1, (NSIZE - 1)
130    beqz t1, L(exitalign)
131    or   t0, zero, NSIZE
132    SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
133
134L(alignloop): #do by bytes until a1 aligned
135    BYTECMP01(0)
136    SUBU t1, t1, 0x1
137    PTR_ADDIU a0, a0, 0x1
138    bnez  t1, L(alignloop)
139    PTR_ADDIU a1, a1, 0x1
140
141L(exitalign):
142
143/* string a1 is NSIZE byte aligned at this point. */
144
145    lui t8, 0x0101
146    ori t8, 0x0101
147    lui t9, 0x7f7f
148    ori t9, 0x7f7f
149#if __mips64
150    dsll t1, t8, 32
151    or  t8, t1
152    dsll t1, t9, 32
153    or  t9, t1
154#endif
155
156    andi t2, a0, (NSIZE - 1) #check if a0 aligned
157    SUBU t3, t0, t2 #t3 will be used as shifter
158    bnez t2, L(uloopenter)
159    SUBU a2, a0, t2 #bring back a0 to aligned position
160
161#define STRCMPW(OFFSET) \
162    LW   v0, OFFSET(a0); \
163    LW   v1, OFFSET(a1); \
164    SUBU t0, v0, t8; \
165    bne  v0, v1, L(worddiff); \
166    nor  t1, v0, t9; \
167    and  t0, t0, t1; \
168    bne  t0, zero, L(returnzero);\
169
170L(wordloop):
171    STRCMPW(0 * NSIZE)
172    DELAY_READ
173    STRCMPW(1 * NSIZE)
174    DELAY_READ
175    STRCMPW(2 * NSIZE)
176    DELAY_READ
177    STRCMPW(3 * NSIZE)
178    DELAY_READ
179    STRCMPW(4 * NSIZE)
180    DELAY_READ
181    STRCMPW(5 * NSIZE)
182    DELAY_READ
183    STRCMPW(6 * NSIZE)
184    DELAY_READ
185    STRCMPW(7 * NSIZE)
186    PTR_ADDIU a0, a0, (8 * NSIZE)
187    b   L(wordloop)
188    PTR_ADDIU a1, a1, (8 * NSIZE)
189
190#define USTRCMPW(OFFSET) \
191    LW  v1, OFFSET(a1); \
192    SUBU    t0, v0, t8; \
193    nor t1, v0, t9; \
194    and t0, t0, t1; \
195    bne t0, zero, L(worddiff); \
196    SRL v0, t2; \
197    LW  a3, (OFFSET + NSIZE)(a2); \
198    SUBU    t0, v1, t8; \
199    SLL t1, a3, t3; \
200    or v0, v0, t1; \
201    bne v0, v1, L(worddiff); \
202    nor t1, v1, t9; \
203    and t0, t0, t1; \
204    bne t0, zero, L(returnzero); \
205    move v0, a3;\
206
207L(uloopenter):
208    LW  v0, 0(a2)
209    SLL t2, 3  #multiply by 8
210    SLL t3, 3  #multiply by 8
211    li  a3, -1 #all 1s
212    SRL a3, t3
213    or v0, a3 #replace with all 1s if zeros in unintented read
214
215L(uwordloop):
216    USTRCMPW(0 * NSIZE)
217    USTRCMPW(1 * NSIZE)
218    USTRCMPW(2 * NSIZE)
219    USTRCMPW(3 * NSIZE)
220    USTRCMPW(4 * NSIZE)
221    USTRCMPW(5 * NSIZE)
222    USTRCMPW(6 * NSIZE)
223    USTRCMPW(7 * NSIZE)
224    PTR_ADDIU a2, a2, (8 * NSIZE)
225    b   L(uwordloop)
226    PTR_ADDIU a1, a1, (8 * NSIZE)
227
228L(returnzero):
229    j   ra
230    move    v0, zero
231
232#if __mips_isa_rev > 1
233#define EXT_COMPARE01(POS) \
234    EXT t0, v0, POS, 8; \
235    beq t0, zero, L(wexit01); \
236    EXT t1, v1, POS, 8; \
237    bne t0, t1, L(wexit01)
238#define EXT_COMPARE89(POS) \
239    EXT t8, v0, POS, 8; \
240    beq t8, zero, L(wexit89); \
241    EXT t9, v1, POS, 8; \
242    bne t8, t9, L(wexit89)
243#else
244#define EXT_COMPARE01(POS) \
245    SRL  t0, v0, POS; \
246    SRL  t1, v1, POS; \
247    andi t0, t0, 0xff; \
248    beq  t0, zero, L(wexit01); \
249    andi t1, t1, 0xff; \
250    bne  t0, t1, L(wexit01)
251#define EXT_COMPARE89(POS) \
252    SRL  t8, v0, POS; \
253    SRL  t9, v1, POS; \
254    andi t8, t8, 0xff; \
255    beq  t8, zero, L(wexit89); \
256    andi t9, t9, 0xff; \
257    bne  t8, t9, L(wexit89)
258#endif
259
260L(worddiff):
261#ifdef USE_CLZ
262    SUBU    t0, v0, t8
263    nor t1, v0, t9
264    and t1, t0, t1
265    xor t0, v0, v1
266    or  t0, t0, t1
267# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
268    wsbh    t0, t0
269    rotr    t0, t0, 16
270# endif
271    clz t1, t0
272    and t1, 0xf8
273# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
274    neg t1
275    addu    t1, 24
276# endif
277    rotrv   v0, v0, t1
278    rotrv   v1, v1, t1
279    and v0, v0, 0xff
280    and v1, v1, 0xff
281    j   ra
282    SUBU    v0, v0, v1
283#else /* USE_CLZ */
284# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
285    andi    t0, v0, 0xff
286    beq t0, zero, L(wexit01)
287    andi    t1, v1, 0xff
288    bne t0, t1, L(wexit01)
289    EXT_COMPARE89(8)
290    EXT_COMPARE01(16)
291#ifndef __mips64
292    SRL t8, v0, 24
293    SRL t9, v1, 24
294#else
295    EXT_COMPARE89(24)
296    EXT_COMPARE01(32)
297    EXT_COMPARE89(40)
298    EXT_COMPARE01(48)
299    SRL t8, v0, 56
300    SRL t9, v1, 56
301#endif
302
303# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
304#ifdef __mips64
305    SRL t0, v0, 56
306    beq t0, zero, L(wexit01)
307    SRL t1, v1, 56
308    bne t0, t1, L(wexit01)
309    EXT_COMPARE89(48)
310    EXT_COMPARE01(40)
311    EXT_COMPARE89(32)
312    EXT_COMPARE01(24)
313#else
314    SRL t0, v0, 24
315    beq t0, zero, L(wexit01)
316    SRL t1, v1, 24
317    bne t0, t1, L(wexit01)
318#endif
319    EXT_COMPARE89(16)
320    EXT_COMPARE01(8)
321
322    andi    t8, v0, 0xff
323    andi    t9, v1, 0xff
324# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
325
326L(wexit89):
327    j   ra
328    SUBU    v0, t8, t9
329L(wexit01):
330    j   ra
331    SUBU    v0, t0, t1
332#endif /* USE_CLZ */
333
334L(byteloop):
335    BYTECMP01(0)
336    BYTECMP89(1)
337    BYTECMP01(2)
338    BYTECMP89(3)
339    BYTECMP01(4)
340    BYTECMP89(5)
341    BYTECMP01(6)
342    BYTECMP89(7)
343    PTR_ADDIU a0, a0, 8
344    b   L(byteloop)
345    PTR_ADDIU a1, a1, 8
346
347L(bexit01):
348    j   ra
349    SUBU    v0, v0, v1
350L(bexit89):
351    j   ra
352    SUBU    v0, t8, t9
353
354    .set    at
355    .set    reorder
356
357END(STRCMP_NAME)
358#ifndef __ANDROID__
359# ifdef _LIBC
360libc_hidden_builtin_def (STRCMP_NAME)
361# endif
362#endif
363