1/*
2 * Copyright (c) 2017 Imagination Technologies.
3 *
4 * All rights reserved.
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 *
10 *      * Redistributions of source code must retain the above copyright
11 *        notice, this list of conditions and the following disclaimer.
12 *      * Redistributions in binary form must reproduce the above copyright
13 *        notice, this list of conditions and the following disclaimer
14 *        in the documentation and/or other materials provided with
15 *        the distribution.
16 *      * Neither the name of Imagination Technologies nor the names of its
17 *        contributors may be used to endorse or promote products derived
18 *        from this software without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31 */
32
33#ifdef __ANDROID__
34# include <private/bionic_asm.h>
35#elif _LIBC
36# include <sysdep.h>
37# include <regdef.h>
38# include <sys/asm.h>
39#elif _COMPILING_NEWLIB
40# include "machine/asm.h"
41# include "machine/regdef.h"
42#else
43# include <regdef.h>
44# include <sys/asm.h>
45#endif
46
47#if __mips64
48# define NSIZE 8
49# define LW ld
50# define LWR ldr
51# define LWL ldl
52# define EXT dext
53# define SRL dsrl
54# define SUBU dsubu
55#else
56# define NSIZE 4
57# define LW lw
58# define LWR lwr
59# define LWL lwl
60# define EXT ext
61# define SRL srl
62# define SUBU subu
63#endif
64
65/* Technically strcmp should not read past the end of the strings being
66   compared.  We will read a full word that may contain excess bits beyond
67   the NULL string terminator but unless ENABLE_READAHEAD is set, we will not
68   read the next word after the end of string.  Setting ENABLE_READAHEAD will
69   improve performance but is technically illegal based on the definition of
70   strcmp.  */
71#ifdef ENABLE_READAHEAD
72# define DELAY_READ
73#else
74# define DELAY_READ nop
75#endif
76
77/* Testing on a little endian machine showed using CLZ was a
78   performance loss, so we are not turning it on by default.  */
79#if defined(ENABLE_CLZ) && (__mips_isa_rev > 1) && (!__mips64)
80# define USE_CLZ
81#endif
82
83/* Some asm.h files do not have the L macro definition.  */
84#ifndef L
85# if _MIPS_SIM == _ABIO32
86#  define L(label) $L ## label
87# else
88#  define L(label) .L ## label
89# endif
90#endif
91
92/* Some asm.h files do not have the PTR_ADDIU macro definition.  */
93#ifndef PTR_ADDIU
94# if _MIPS_SIM == _ABIO32
95#  define PTR_ADDIU       addiu
96# else
97#  define PTR_ADDIU       daddiu
98# endif
99#endif
100
101/* It might seem better to do the 'beq' instruction between the two 'lbu'
102   instructions so that the nop is not needed but testing showed that this
103   code is actually faster (based on glibc strcmp test).  */
104#define BYTECMP01(OFFSET) \
105    lbu v0, OFFSET(a0); \
106    lbu v1, OFFSET(a1); \
107    beq v0, zero, L(bexit01); \
108    nop; \
109    bne v0, v1, L(bexit01)
110
111#define BYTECMP89(OFFSET) \
112    lbu t8, OFFSET(a0); \
113    lbu t9, OFFSET(a1); \
114    beq t8, zero, L(bexit89); \
115    nop;    \
116    bne t8, t9, L(bexit89)
117
118/* Allow the routine to be named something else if desired.  */
119#ifndef STRNCMP_NAME
120# define STRNCMP_NAME strncmp
121#endif
122
123#ifdef __ANDROID__
124LEAF(STRNCMP_NAME, 0)
125#else
126LEAF(STRNCMP_NAME)
127#endif
128    .set    nomips16
129    .set    noreorder
130
131    srl t0, a2, (2 + NSIZE / 4)
132    beqz  t0, L(byteloop) #process by bytes if less than (2 * NSIZE)
133    andi t1, a1, (NSIZE - 1)
134    beqz  t1, L(exitalign)
135    or   t0, zero, NSIZE
136    SUBU t1, t0, t1 #process (NSIZE - 1) bytes at max
137    SUBU a2, a2, t1 #dec count by t1
138
139L(alignloop): #do by bytes until a1 aligned
140    BYTECMP01(0)
141    SUBU t1, t1, 0x1
142    PTR_ADDIU a0, a0, 0x1
143    bne  t1, zero, L(alignloop)
144    PTR_ADDIU a1, a1, 0x1
145
146L(exitalign):
147
148/* string a1 is NSIZE byte aligned at this point. */
149#ifndef __mips1
150    lui t8, 0x0101
151    ori t8, 0x0101
152    lui t9, 0x7f7f
153    ori t9, 0x7f7f
154#if __mips64
155    dsll t0, t8, 32
156    or  t8, t0
157    dsll t1, t9, 32
158    or  t9, t1
159#endif
160#endif
161
162/* hardware or software alignment not supported for mips1
163   rev6 archs have h/w unaligned support
164   remainings archs need to implemented with unaligned instructions */
165
166#if __mips1
167    andi t0, a0, (NSIZE - 1)
168    bne  t0, zero, L(byteloop)
169#elif __mips_isa_rev < 6
170    andi t0, a0, (NSIZE - 1)
171    bne  t0, zero, L(uwordloop)
172#endif
173
174#define STRCMPW(OFFSET) \
175    LW   v0, (OFFSET)(a0); \
176    LW   v1, (OFFSET)(a1); \
177    SUBU t0, v0, t8; \
178    bne  v0, v1, L(worddiff); \
179    nor  t1, v0, t9; \
180    and  t0, t0, t1; \
181    bne  t0, zero, L(returnzero);\
182
183L(wordloop):
184    SUBU t1, a2, (8 * NSIZE)
185    bltz t1, L(onewords)
186    STRCMPW(0 * NSIZE)
187    DELAY_READ
188    STRCMPW(1 * NSIZE)
189    DELAY_READ
190    STRCMPW(2 * NSIZE)
191    DELAY_READ
192    STRCMPW(3 * NSIZE)
193    DELAY_READ
194    STRCMPW(4 * NSIZE)
195    DELAY_READ
196    STRCMPW(5 * NSIZE)
197    DELAY_READ
198    STRCMPW(6 * NSIZE)
199    DELAY_READ
200    STRCMPW(7 * NSIZE)
201    SUBU a2, a2, (8 * NSIZE)
202    PTR_ADDIU a0, a0, (8 * NSIZE)
203    b   L(wordloop)
204    PTR_ADDIU a1, a1, (8 * NSIZE)
205
206L(onewords):
207    SUBU t1, a2, NSIZE
208    bltz t1, L(byteloop)
209    STRCMPW(0)
210    SUBU a2, a2, NSIZE
211    PTR_ADDIU a0, a0, NSIZE
212    b   L(onewords)
213    PTR_ADDIU a1, a1, NSIZE
214
215#if __mips_isa_rev < 6 && !__mips1
216#define USTRCMPW(OFFSET) \
217    LWR v0, (OFFSET)(a0); \
218    LWL v0, (OFFSET + NSIZE - 1)(a0); \
219    LW  v1, (OFFSET)(a1); \
220    SUBU    t0, v0, t8; \
221    bne v0, v1, L(worddiff); \
222    nor t1, v0, t9; \
223    and t0, t0, t1; \
224    bne t0, zero, L(returnzero);\
225
226L(uwordloop):
227    SUBU t1, a2, (8 * NSIZE)
228    bltz t1, L(uonewords)
229    USTRCMPW(0 * NSIZE)
230    DELAY_READ
231    USTRCMPW(1 * NSIZE)
232    DELAY_READ
233    USTRCMPW(2 * NSIZE)
234    DELAY_READ
235    USTRCMPW(3 * NSIZE)
236    DELAY_READ
237    USTRCMPW(4 * NSIZE)
238    DELAY_READ
239    USTRCMPW(5 * NSIZE)
240    DELAY_READ
241    USTRCMPW(6 * NSIZE)
242    DELAY_READ
243    USTRCMPW(7 * NSIZE)
244    SUBU a2, a2, (8 * NSIZE)
245    PTR_ADDIU a0, a0, (8 * NSIZE)
246    b   L(uwordloop)
247    PTR_ADDIU a1, a1, (8 * NSIZE)
248
249L(uonewords):
250    SUBU t1, a2, NSIZE
251    bltz t1, L(byteloop)
252    USTRCMPW(0)
253    SUBU a2, a2, NSIZE
254    PTR_ADDIU a0, a0, NSIZE
255    b   L(uonewords)
256    PTR_ADDIU a1, a1, NSIZE
257
258#endif
259
260L(returnzero):
261    j   ra
262    move    v0, zero
263
264#if __mips_isa_rev > 1
265#define EXT_COMPARE01(POS) \
266    EXT t0, v0, POS, 8; \
267    beq t0, zero, L(wexit01); \
268    EXT t1, v1, POS, 8; \
269    bne t0, t1, L(wexit01)
270#define EXT_COMPARE89(POS) \
271    EXT t8, v0, POS, 8; \
272    beq t8, zero, L(wexit89); \
273    EXT t9, v1, POS, 8; \
274    bne t8, t9, L(wexit89)
275#else
276#define EXT_COMPARE01(POS) \
277    SRL  t0, v0, POS; \
278    SRL  t1, v1, POS; \
279    andi t0, t0, 0xff; \
280    beq  t0, zero, L(wexit01); \
281    andi t1, t1, 0xff; \
282    bne  t0, t1, L(wexit01)
283#define EXT_COMPARE89(POS) \
284    SRL  t8, v0, POS; \
285    SRL  t9, v1, POS; \
286    andi t8, t8, 0xff; \
287    beq  t8, zero, L(wexit89); \
288    andi t9, t9, 0xff; \
289    bne  t8, t9, L(wexit89)
290#endif
291
292L(worddiff):
293#ifdef USE_CLZ
294    SUBU    t0, v0, t8
295    nor t1, v0, t9
296    and t1, t0, t1
297    xor t0, v0, v1
298    or  t0, t0, t1
299# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
300    wsbh    t0, t0
301    rotr    t0, t0, 16
302# endif
303    clz t1, t0
304    and t1, 0xf8
305# if __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__
306    neg t1
307    addu    t1, 24
308# endif
309    rotrv   v0, v0, t1
310    rotrv   v1, v1, t1
311    and v0, v0, 0xff
312    and v1, v1, 0xff
313    j   ra
314    SUBU    v0, v0, v1
315#else /* USE_CLZ */
316# if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
317    andi    t0, v0, 0xff
318    beq t0, zero, L(wexit01)
319    andi    t1, v1, 0xff
320    bne t0, t1, L(wexit01)
321    EXT_COMPARE89(8)
322    EXT_COMPARE01(16)
323#ifndef __mips64
324    SRL t8, v0, 24
325    SRL t9, v1, 24
326#else
327    EXT_COMPARE89(24)
328    EXT_COMPARE01(32)
329    EXT_COMPARE89(40)
330    EXT_COMPARE01(48)
331    SRL t8, v0, 56
332    SRL t9, v1, 56
333#endif
334
335# else /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
336#ifdef __mips64
337    SRL t0, v0, 56
338    beq t0, zero, L(wexit01)
339    SRL t1, v1, 56
340    bne t0, t1, L(wexit01)
341    EXT_COMPARE89(48)
342    EXT_COMPARE01(40)
343    EXT_COMPARE89(32)
344    EXT_COMPARE01(24)
345#else
346    SRL t0, v0, 24
347    beq t0, zero, L(wexit01)
348    SRL t1, v1, 24
349    bne t0, t1, L(wexit01)
350#endif
351    EXT_COMPARE89(16)
352    EXT_COMPARE01(8)
353
354    andi    t8, v0, 0xff
355    andi    t9, v1, 0xff
356# endif /* __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ */
357
358L(wexit89):
359    j   ra
360    SUBU    v0, t8, t9
361L(wexit01):
362    j   ra
363    SUBU    v0, t0, t1
364#endif /* USE_CLZ */
365
366L(byteloop):
367    beq a2, zero, L(returnzero)
368    SUBU a2, a2, 1
369    BYTECMP01(0)
370    nop
371    beq a2, zero, L(returnzero)
372    SUBU a2, a2, 1
373    BYTECMP89(1)
374    nop
375    beq a2, zero, L(returnzero)
376    SUBU a2, a2, 1
377    BYTECMP01(2)
378    nop
379    beq a2, zero, L(returnzero)
380    SUBU a2, a2, 1
381    BYTECMP89(3)
382    PTR_ADDIU a0, a0, 4
383    b   L(byteloop)
384    PTR_ADDIU a1, a1, 4
385
386L(bexit01):
387    j   ra
388    SUBU    v0, v0, v1
389L(bexit89):
390    j   ra
391    SUBU    v0, t8, t9
392
393    .set    at
394    .set    reorder
395
396END(STRNCMP_NAME)
397#ifndef __ANDROID__
398# ifdef _LIBC
399libc_hidden_builtin_def (STRNCMP_NAME)
400# endif
401#endif
402