1/*
2Copyright (c) 2011 Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef USE_AS_WCSCAT
32
33# ifndef L
34#  define L(label)	.L##label
35# endif
36
37# ifndef cfi_startproc
38#  define cfi_startproc	.cfi_startproc
39# endif
40
41# ifndef cfi_endproc
42#  define cfi_endproc	.cfi_endproc
43# endif
44
45# ifndef ENTRY
46#  define ENTRY(name)	\
47	.type name,  @function;	\
48	.globl name;	\
49	.p2align 4;	\
50name:	\
51	cfi_startproc
52# endif
53
54# ifndef END
55#  define END(name)	\
56	cfi_endproc;	\
57	.size name, .-name
58# endif
59
60# define PARMS	4
61# define STR	PARMS
62# define RETURN ret
63
64	.text
65ENTRY (wcslen)
66	mov	STR(%esp), %edx
67#endif
68	cmpl	$0, (%edx)
69	jz	L(exit_tail0)
70	cmpl	$0, 4(%edx)
71	jz	L(exit_tail1)
72	cmpl	$0, 8(%edx)
73	jz	L(exit_tail2)
74	cmpl	$0, 12(%edx)
75	jz	L(exit_tail3)
76	cmpl	$0, 16(%edx)
77	jz	L(exit_tail4)
78	cmpl	$0, 20(%edx)
79	jz	L(exit_tail5)
80	cmpl	$0, 24(%edx)
81	jz	L(exit_tail6)
82	cmpl	$0, 28(%edx)
83	jz	L(exit_tail7)
84
85	pxor	%xmm0, %xmm0
86
87	lea	32(%edx), %eax
88	lea	-16(%eax), %ecx
89	and	$-16, %eax
90
91	pcmpeqd	(%eax), %xmm0
92	pmovmskb %xmm0, %edx
93	pxor	%xmm1, %xmm1
94	lea	16(%eax), %eax
95	test	%edx, %edx
96	jnz	L(exit)
97
98	pcmpeqd	(%eax), %xmm1
99	pmovmskb %xmm1, %edx
100	pxor	%xmm2, %xmm2
101	lea	16(%eax), %eax
102	test	%edx, %edx
103	jnz	L(exit)
104
105	pcmpeqd	(%eax), %xmm2
106	pmovmskb %xmm2, %edx
107	pxor	%xmm3, %xmm3
108	lea	16(%eax), %eax
109	test	%edx, %edx
110	jnz	L(exit)
111
112	pcmpeqd	(%eax), %xmm3
113	pmovmskb %xmm3, %edx
114	lea	16(%eax), %eax
115	test	%edx, %edx
116	jnz	L(exit)
117
118	pcmpeqd	(%eax), %xmm0
119	pmovmskb %xmm0, %edx
120	lea	16(%eax), %eax
121	test	%edx, %edx
122	jnz	L(exit)
123
124	pcmpeqd	(%eax), %xmm1
125	pmovmskb %xmm1, %edx
126	lea	16(%eax), %eax
127	test	%edx, %edx
128	jnz	L(exit)
129
130	pcmpeqd	(%eax), %xmm2
131	pmovmskb %xmm2, %edx
132	lea	16(%eax), %eax
133	test	%edx, %edx
134	jnz	L(exit)
135
136	pcmpeqd	(%eax), %xmm3
137	pmovmskb %xmm3, %edx
138	lea	16(%eax), %eax
139	test	%edx, %edx
140	jnz	L(exit)
141
142	pcmpeqd	(%eax), %xmm0
143	pmovmskb %xmm0, %edx
144	lea	16(%eax), %eax
145	test	%edx, %edx
146	jnz	L(exit)
147
148	pcmpeqd	(%eax), %xmm1
149	pmovmskb %xmm1, %edx
150	lea	16(%eax), %eax
151	test	%edx, %edx
152	jnz	L(exit)
153
154	pcmpeqd	(%eax), %xmm2
155	pmovmskb %xmm2, %edx
156	lea	16(%eax), %eax
157	test	%edx, %edx
158	jnz	L(exit)
159
160	pcmpeqd	(%eax), %xmm3
161	pmovmskb %xmm3, %edx
162	lea	16(%eax), %eax
163	test	%edx, %edx
164	jnz	L(exit)
165
166	pcmpeqd	(%eax), %xmm0
167	pmovmskb %xmm0, %edx
168	lea	16(%eax), %eax
169	test	%edx, %edx
170	jnz	L(exit)
171
172	pcmpeqd	(%eax), %xmm1
173	pmovmskb %xmm1, %edx
174	lea	16(%eax), %eax
175	test	%edx, %edx
176	jnz	L(exit)
177
178	pcmpeqd	(%eax), %xmm2
179	pmovmskb %xmm2, %edx
180	lea	16(%eax), %eax
181	test	%edx, %edx
182	jnz	L(exit)
183
184	pcmpeqd	(%eax), %xmm3
185	pmovmskb %xmm3, %edx
186	lea	16(%eax), %eax
187	test	%edx, %edx
188	jnz	L(exit)
189
190	and	$-0x40, %eax
191
192	.p2align 4
193L(aligned_64_loop):
194	movaps	(%eax), %xmm0
195	movaps	16(%eax), %xmm1
196	movaps	32(%eax), %xmm2
197	movaps	48(%eax), %xmm6
198
199	pminub	%xmm1, %xmm0
200	pminub	%xmm6, %xmm2
201	pminub	%xmm0, %xmm2
202	pcmpeqd	%xmm3, %xmm2
203	pmovmskb %xmm2, %edx
204	lea	64(%eax), %eax
205	test	%edx, %edx
206	jz	L(aligned_64_loop)
207
208	pcmpeqd	-64(%eax), %xmm3
209	pmovmskb %xmm3, %edx
210	lea	48(%ecx), %ecx
211	test	%edx, %edx
212	jnz	L(exit)
213
214	pcmpeqd	%xmm1, %xmm3
215	pmovmskb %xmm3, %edx
216	lea	-16(%ecx), %ecx
217	test	%edx, %edx
218	jnz	L(exit)
219
220	pcmpeqd	-32(%eax), %xmm3
221	pmovmskb %xmm3, %edx
222	lea	-16(%ecx), %ecx
223	test	%edx, %edx
224	jnz	L(exit)
225
226	pcmpeqd	%xmm6, %xmm3
227	pmovmskb %xmm3, %edx
228	lea	-16(%ecx), %ecx
229	test	%edx, %edx
230	jnz	L(exit)
231
232	jmp	L(aligned_64_loop)
233
234	.p2align 4
235L(exit):
236	sub	%ecx, %eax
237	shr	$2, %eax
238	test	%dl, %dl
239	jz	L(exit_high)
240
241	mov	%dl, %cl
242	and	$15, %cl
243	jz	L(exit_1)
244	RETURN
245
246	.p2align 4
247L(exit_high):
248	mov	%dh, %ch
249	and	$15, %ch
250	jz	L(exit_3)
251	add	$2, %eax
252	RETURN
253
254	.p2align 4
255L(exit_1):
256	add	$1, %eax
257	RETURN
258
259	.p2align 4
260L(exit_3):
261	add	$3, %eax
262	RETURN
263
264	.p2align 4
265L(exit_tail0):
266	xor	%eax, %eax
267	RETURN
268
269	.p2align 4
270L(exit_tail1):
271	mov	$1, %eax
272	RETURN
273
274	.p2align 4
275L(exit_tail2):
276	mov	$2, %eax
277	RETURN
278
279	.p2align 4
280L(exit_tail3):
281	mov	$3, %eax
282	RETURN
283
284	.p2align 4
285L(exit_tail4):
286	mov	$4, %eax
287	RETURN
288
289	.p2align 4
290L(exit_tail5):
291	mov	$5, %eax
292	RETURN
293
294	.p2align 4
295L(exit_tail6):
296	mov	$6, %eax
297	RETURN
298
299	.p2align 4
300L(exit_tail7):
301	mov	$7, %eax
302#ifndef USE_AS_WCSCAT
303	RETURN
304
305END (wcslen)
306#endif
307