1/*
2Copyright (c) 2011, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef L
32# define L(label)	.L##label
33#endif
34
35#ifndef cfi_startproc
36# define cfi_startproc	.cfi_startproc
37#endif
38
39#ifndef cfi_endproc
40# define cfi_endproc	.cfi_endproc
41#endif
42
43#ifndef cfi_rel_offset
44# define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
45#endif
46
47#ifndef cfi_restore
48# define cfi_restore(reg)	.cfi_restore reg
49#endif
50
51#ifndef cfi_adjust_cfa_offset
52# define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
53#endif
54
55#ifndef ENTRY
56# define ENTRY(name)	\
57	.type name,  @function;	\
58	.globl name;	\
59	.p2align 4;	\
60name:	\
61	cfi_startproc
62#endif
63
64#ifndef END
65# define END(name)	\
66	cfi_endproc;	\
67	.size name,	.-name
68#endif
69
70#define CFI_PUSH(REG)	\
71	cfi_adjust_cfa_offset (4);	\
72	cfi_rel_offset (REG, 0)
73
74#define CFI_POP(REG)	\
75	cfi_adjust_cfa_offset (-4);	\
76	cfi_restore (REG)
77
78#define PUSH(REG)	pushl REG;	CFI_PUSH (REG)
79#define POP(REG)	popl REG;	CFI_POP (REG)
80
81#define PARMS	8
82#define ENTRANCE	PUSH(%edi)
83#define RETURN	POP (%edi); ret; CFI_PUSH (%edi);
84
85
86#define STR1	PARMS
87#define STR2	STR1+4
88
89	.text
90ENTRY (strchr)
91
92	ENTRANCE
93	mov	STR1(%esp), %ecx
94	movd	STR2(%esp), %xmm1
95
96	pxor	%xmm2, %xmm2
97	mov	%ecx, %edi
98	punpcklbw %xmm1, %xmm1
99	punpcklbw %xmm1, %xmm1
100	/* ECX has OFFSET. */
101	and	$15, %ecx
102	pshufd	$0, %xmm1, %xmm1
103	je	L(loop)
104
105/* Handle unaligned string.  */
106	and	$-16, %edi
107	movdqa	(%edi), %xmm0
108	pcmpeqb	%xmm0, %xmm2
109	pcmpeqb	%xmm1, %xmm0
110	/* Find where NULL is.  */
111	pmovmskb %xmm2, %edx
112	/* Check if there is a match.  */
113	pmovmskb %xmm0, %eax
114	/* Remove the leading bytes.  */
115	sarl	%cl, %edx
116	sarl	%cl, %eax
117	test	%eax, %eax
118	jz	L(unaligned_no_match)
119	add	%ecx, %edi
120	test	%edx, %edx
121	jz	L(match_case1)
122	jmp	L(match_case2)
123
124	.p2align 4
125L(unaligned_no_match):
126	test	%edx, %edx
127	jne	L(return_null)
128
129	pxor	%xmm2, %xmm2
130	add	$16, %edi
131
132	.p2align 4
133/* Loop start on aligned string.  */
134L(loop):
135	movdqa	(%edi), %xmm0
136	pcmpeqb	%xmm0, %xmm2
137	pcmpeqb	%xmm1, %xmm0
138	pmovmskb %xmm2, %edx
139	pmovmskb %xmm0, %eax
140	test	%eax, %eax
141	jnz	L(matches)
142	test	%edx, %edx
143	jnz	L(return_null)
144	add	$16, %edi
145
146	movdqa	(%edi), %xmm0
147	pcmpeqb	%xmm0, %xmm2
148	pcmpeqb	%xmm1, %xmm0
149	pmovmskb %xmm2, %edx
150	pmovmskb %xmm0, %eax
151	test	%eax, %eax
152	jnz	L(matches)
153	test	%edx, %edx
154	jnz	L(return_null)
155	add	$16, %edi
156
157	movdqa	(%edi), %xmm0
158	pcmpeqb	%xmm0, %xmm2
159	pcmpeqb	%xmm1, %xmm0
160	pmovmskb %xmm2, %edx
161	pmovmskb %xmm0, %eax
162	test	%eax, %eax
163	jnz	L(matches)
164	test	%edx, %edx
165	jnz	L(return_null)
166	add	$16, %edi
167
168	movdqa	(%edi), %xmm0
169	pcmpeqb	%xmm0, %xmm2
170	pcmpeqb	%xmm1, %xmm0
171	pmovmskb %xmm2, %edx
172	pmovmskb %xmm0, %eax
173	test	%eax, %eax
174	jnz	L(matches)
175	test	%edx, %edx
176	jnz	L(return_null)
177	add	$16, %edi
178	jmp	L(loop)
179
180L(matches):
181	/* There is a match.  First find where NULL is.  */
182	test	%edx, %edx
183	jz	L(match_case1)
184
185	.p2align 4
186L(match_case2):
187	test	%al, %al
188	jz	L(match_higth_case2)
189
190	mov	%al, %cl
191	and	$15, %cl
192	jnz	L(match_case2_4)
193
194	mov	%dl, %ch
195	and	$15, %ch
196	jnz	L(return_null)
197
198	test	$0x10, %al
199	jnz	L(Exit5)
200	test	$0x10, %dl
201	jnz	L(return_null)
202	test	$0x20, %al
203	jnz	L(Exit6)
204	test	$0x20, %dl
205	jnz	L(return_null)
206	test	$0x40, %al
207	jnz	L(Exit7)
208	test	$0x40, %dl
209	jnz	L(return_null)
210	lea	7(%edi), %eax
211	RETURN
212
213	.p2align 4
214L(match_case2_4):
215	test	$0x01, %al
216	jnz	L(Exit1)
217	test	$0x01, %dl
218	jnz	L(return_null)
219	test	$0x02, %al
220	jnz	L(Exit2)
221	test	$0x02, %dl
222	jnz	L(return_null)
223	test	$0x04, %al
224	jnz	L(Exit3)
225	test	$0x04, %dl
226	jnz	L(return_null)
227	lea	3(%edi), %eax
228	RETURN
229
230	.p2align 4
231L(match_higth_case2):
232	test	%dl, %dl
233	jnz	L(return_null)
234
235	mov	%ah, %cl
236	and	$15, %cl
237	jnz	L(match_case2_12)
238
239	mov	%dh, %ch
240	and	$15, %ch
241	jnz	L(return_null)
242
243	test	$0x10, %ah
244	jnz	L(Exit13)
245	test	$0x10, %dh
246	jnz	L(return_null)
247	test	$0x20, %ah
248	jnz	L(Exit14)
249	test	$0x20, %dh
250	jnz	L(return_null)
251	test	$0x40, %ah
252	jnz	L(Exit15)
253	test	$0x40, %dh
254	jnz	L(return_null)
255	lea	15(%edi), %eax
256	RETURN
257
258	.p2align 4
259L(match_case2_12):
260	test	$0x01, %ah
261	jnz	L(Exit9)
262	test	$0x01, %dh
263	jnz	L(return_null)
264	test	$0x02, %ah
265	jnz	L(Exit10)
266	test	$0x02, %dh
267	jnz	L(return_null)
268	test	$0x04, %ah
269	jnz	L(Exit11)
270	test	$0x04, %dh
271	jnz	L(return_null)
272	lea	11(%edi), %eax
273	RETURN
274
275	.p2align 4
276L(match_case1):
277	test	%al, %al
278	jz	L(match_higth_case1)
279
280	test	$0x01, %al
281	jnz	L(Exit1)
282	test	$0x02, %al
283	jnz	L(Exit2)
284	test	$0x04, %al
285	jnz	L(Exit3)
286	test	$0x08, %al
287	jnz	L(Exit4)
288	test	$0x10, %al
289	jnz	L(Exit5)
290	test	$0x20, %al
291	jnz	L(Exit6)
292	test	$0x40, %al
293	jnz	L(Exit7)
294	lea	7(%edi), %eax
295	RETURN
296
297	.p2align 4
298L(match_higth_case1):
299	test	$0x01, %ah
300	jnz	L(Exit9)
301	test	$0x02, %ah
302	jnz	L(Exit10)
303	test	$0x04, %ah
304	jnz	L(Exit11)
305	test	$0x08, %ah
306	jnz	L(Exit12)
307	test	$0x10, %ah
308	jnz	L(Exit13)
309	test	$0x20, %ah
310	jnz	L(Exit14)
311	test	$0x40, %ah
312	jnz	L(Exit15)
313	lea	15(%edi), %eax
314	RETURN
315
316	.p2align 4
317L(Exit1):
318	lea	(%edi), %eax
319	RETURN
320
321	.p2align 4
322L(Exit2):
323	lea	1(%edi), %eax
324	RETURN
325
326	.p2align 4
327L(Exit3):
328	lea	2(%edi), %eax
329	RETURN
330
331	.p2align 4
332L(Exit4):
333	lea	3(%edi), %eax
334	RETURN
335
336	.p2align 4
337L(Exit5):
338	lea	4(%edi), %eax
339	RETURN
340
341	.p2align 4
342L(Exit6):
343	lea	5(%edi), %eax
344	RETURN
345
346	.p2align 4
347L(Exit7):
348	lea	6(%edi), %eax
349	RETURN
350
351	.p2align 4
352L(Exit9):
353	lea	8(%edi), %eax
354	RETURN
355
356	.p2align 4
357L(Exit10):
358	lea	9(%edi), %eax
359	RETURN
360
361	.p2align 4
362L(Exit11):
363	lea	10(%edi), %eax
364	RETURN
365
366	.p2align 4
367L(Exit12):
368	lea	11(%edi), %eax
369	RETURN
370
371	.p2align 4
372L(Exit13):
373	lea	12(%edi), %eax
374	RETURN
375
376	.p2align 4
377L(Exit14):
378	lea	13(%edi), %eax
379	RETURN
380
381	.p2align 4
382L(Exit15):
383	lea	14(%edi), %eax
384	RETURN
385
386	.p2align 4
387L(return_null):
388	xor	%eax, %eax
389	RETURN
390
391END (strchr)
392