1/*
2Copyright (c) 2011 Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label)	.L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc	.cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc	.cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg)	.cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name)	\
57	.type name,  @function;	\
58	.globl name;	\
59	.p2align 4;	\
60name:	\
61	cfi_startproc
62#endif
63
64#ifndef END
65# define END(name)	\
66	cfi_endproc;	\
67	.size name,	.-name
68#endif
69
70#define CFI_PUSH(REG)	\
71	cfi_adjust_cfa_offset (4);	\
72	cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG)	\
75	cfi_adjust_cfa_offset (-4);	\
76	cfi_restore (REG)
77
78#define PUSH(REG)	pushl REG; CFI_PUSH (REG)
79#define POP(REG)	popl REG; CFI_POP (REG)
80
81#define PARMS	4
82
83
84#define STR1  PARMS
85#define STR2  STR1+4
86
87	.text
88ENTRY (wcschr)
89
90	mov	STR1(%esp), %ecx
91	movd	STR2(%esp), %xmm1
92
93	mov	%ecx, %eax
94	punpckldq %xmm1, %xmm1
95	pxor	%xmm2, %xmm2
96	punpckldq %xmm1, %xmm1
97
98	and	$63, %eax
99	cmp	$48, %eax
100	ja	L(cross_cache)
101
102	movdqu	(%ecx), %xmm0
103	pcmpeqd	%xmm0, %xmm2
104	pcmpeqd	%xmm1, %xmm0
105	pmovmskb %xmm2, %edx
106	pmovmskb %xmm0, %eax
107	or	%eax, %edx
108	jnz	L(matches)
109	and	$-16, %ecx
110	jmp	L(loop)
111
112	.p2align 4
113L(cross_cache):
114	PUSH	(%edi)
115	mov	%ecx, %edi
116	mov	%eax, %ecx
117	and	$-16, %edi
118	and	$15, %ecx
119	movdqa	(%edi), %xmm0
120	pcmpeqd	%xmm0, %xmm2
121	pcmpeqd	%xmm1, %xmm0
122	pmovmskb %xmm2, %edx
123	pmovmskb %xmm0, %eax
124
125	sarl	%cl, %edx
126	sarl	%cl, %eax
127	test	%eax, %eax
128	jz	L(unaligned_no_match)
129
130	add	%edi, %ecx
131	POP	(%edi)
132
133	test	%edx, %edx
134	jz	L(match_case1)
135	test	%al, %al
136	jz	L(match_higth_case2)
137	test	$15, %al
138	jnz	L(match_case2_4)
139	test	$15, %dl
140	jnz	L(return_null)
141	lea	4(%ecx), %eax
142	ret
143
144	CFI_PUSH (%edi)
145
146	.p2align 4
147L(unaligned_no_match):
148	mov	%edi, %ecx
149	POP	(%edi)
150
151	test	%edx, %edx
152	jnz	L(return_null)
153
154	pxor	%xmm2, %xmm2
155
156/* Loop start on aligned string.  */
157	.p2align 4
158L(loop):
159	add	$16, %ecx
160	movdqa	(%ecx), %xmm0
161	pcmpeqd	%xmm0, %xmm2
162	pcmpeqd	%xmm1, %xmm0
163	pmovmskb %xmm2, %edx
164	pmovmskb %xmm0, %eax
165	or	%eax, %edx
166	jnz	L(matches)
167	add	$16, %ecx
168
169	movdqa	(%ecx), %xmm0
170	pcmpeqd	%xmm0, %xmm2
171	pcmpeqd	%xmm1, %xmm0
172	pmovmskb %xmm2, %edx
173	pmovmskb %xmm0, %eax
174	or	%eax, %edx
175	jnz	L(matches)
176	add	$16, %ecx
177
178	movdqa	(%ecx), %xmm0
179	pcmpeqd	%xmm0, %xmm2
180	pcmpeqd	%xmm1, %xmm0
181	pmovmskb %xmm2, %edx
182	pmovmskb %xmm0, %eax
183	or	%eax, %edx
184	jnz	L(matches)
185	add	$16, %ecx
186
187	movdqa	(%ecx), %xmm0
188	pcmpeqd	%xmm0, %xmm2
189	pcmpeqd	%xmm1, %xmm0
190	pmovmskb %xmm2, %edx
191	pmovmskb %xmm0, %eax
192	or	%eax, %edx
193	jz	L(loop)
194
195	.p2align 4
196L(matches):
197	pmovmskb %xmm2, %edx
198	test	%eax, %eax
199	jz	L(return_null)
200	test	%edx, %edx
201	jz	L(match_case1)
202
203	.p2align 4
204L(match_case2):
205	test	%al, %al
206	jz	L(match_higth_case2)
207	test	$15, %al
208	jnz	L(match_case2_4)
209	test	$15, %dl
210	jnz	L(return_null)
211	lea	4(%ecx), %eax
212	ret
213
214	.p2align 4
215L(match_case2_4):
216	mov	%ecx, %eax
217	ret
218
219	.p2align 4
220L(match_higth_case2):
221	test	%dl, %dl
222	jnz	L(return_null)
223	test	$15, %ah
224	jnz	L(match_case2_12)
225	test	$15, %dh
226	jnz	L(return_null)
227	lea	12(%ecx), %eax
228	ret
229
230	.p2align 4
231L(match_case2_12):
232	lea	8(%ecx), %eax
233	ret
234
235	.p2align 4
236L(match_case1):
237	test	%al, %al
238	jz	L(match_higth_case1)
239
240	test	$0x01, %al
241	jnz	L(exit0)
242	lea	4(%ecx), %eax
243	ret
244
245	.p2align 4
246L(match_higth_case1):
247	test	$0x01, %ah
248	jnz	L(exit3)
249	lea	12(%ecx), %eax
250	ret
251
252	.p2align 4
253L(exit0):
254	mov	%ecx, %eax
255	ret
256
257	.p2align 4
258L(exit3):
259	lea	8(%ecx), %eax
260	ret
261
262	.p2align 4
263L(return_null):
264	xor	%eax, %eax
265	ret
266
267END (wcschr)
268