1/*
2Copyright (c) 2011, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#ifndef USE_AS_STRCAT
32
33# ifndef L
34#  define L(label)	.L##label
35# endif
36
37# ifndef cfi_startproc
38#  define cfi_startproc	.cfi_startproc
39# endif
40
41# ifndef cfi_endproc
42#  define cfi_endproc	.cfi_endproc
43# endif
44
45# ifndef cfi_rel_offset
46#  define cfi_rel_offset(reg, off)	.cfi_rel_offset reg, off
47# endif
48
49# ifndef cfi_restore
50#  define cfi_restore(reg)	.cfi_restore reg
51# endif
52
53# ifndef cfi_adjust_cfa_offset
54#  define cfi_adjust_cfa_offset(off)	.cfi_adjust_cfa_offset off
55# endif
56
57# ifndef ENTRY
58#  define ENTRY(name)	\
59	.type name, @function;	\
60	.globl name;	\
61	.p2align 4;	\
62name:	\
63	cfi_startproc
64# endif
65
66# ifndef END
67#  define END(name)	\
68	cfi_endproc;	\
69	.size name, .-name
70# endif
71
72# define CFI_PUSH(REG)	\
73	cfi_adjust_cfa_offset (4);	\
74	cfi_rel_offset (REG, 0)
75
76# define CFI_POP(REG)	\
77	cfi_adjust_cfa_offset (-4);	\
78	cfi_restore (REG)
79
80# define PUSH(REG)	pushl REG; CFI_PUSH (REG)
81# define POP(REG)	popl REG; CFI_POP (REG)
82
83# ifndef STRCPY
84#  define STRCPY  strcpy_atom
85# endif
86
87# ifdef USE_AS_STRNCPY
88#  define PARMS  8
89#  define ENTRANCE PUSH (%ebx)
90#  define RETURN  POP (%ebx); ret; CFI_PUSH (%ebx);
91#  define RETURN1  POP (%edi); POP (%ebx); ret; CFI_PUSH (%ebx); CFI_PUSH (%edi)
92# else
93#  define PARMS  4
94#  define ENTRANCE
95#  define RETURN  ret
96#  define RETURN1  POP (%edi); ret; CFI_PUSH (%edi)
97# endif
98
99# ifdef USE_AS_STPCPY
100#  define SAVE_RESULT(n)  lea	n(%edx), %eax
101#  define SAVE_RESULT_TAIL(n)  lea	n(%edx), %eax
102# else
103#  define SAVE_RESULT(n)  movl	%edi, %eax
104#  define SAVE_RESULT_TAIL(n)  movl	%edx, %eax
105# endif
106
107# define STR1  PARMS
108# define STR2  STR1+4
109# define LEN  STR2+4
110
111/* In this code following instructions are used for copying:
112	movb	- 1 byte
113	movw	- 2 byte
114	movl	- 4 byte
115	movlpd	- 8 byte
116	movaps	- 16 byte - requires 16 byte alignment
117	of	sourse and destination adresses.
118*/
119
120.text
121ENTRY (STRCPY)
122	ENTRANCE
123	mov	STR1(%esp), %edx
124	mov	STR2(%esp), %ecx
125# ifdef USE_AS_STRNCPY
126	movl	LEN(%esp), %ebx
127	cmp	$8, %ebx
128	jbe	L(StrncpyExit8Bytes)
129# endif
130	cmpb	$0, (%ecx)
131	jz	L(ExitTail1)
132	cmpb	$0, 1(%ecx)
133	jz	L(ExitTail2)
134	cmpb	$0, 2(%ecx)
135	jz	L(ExitTail3)
136	cmpb	$0, 3(%ecx)
137	jz	L(ExitTail4)
138	cmpb	$0, 4(%ecx)
139	jz	L(ExitTail5)
140	cmpb	$0, 5(%ecx)
141	jz	L(ExitTail6)
142	cmpb	$0, 6(%ecx)
143	jz	L(ExitTail7)
144	cmpb	$0, 7(%ecx)
145	jz	L(ExitTail8)
146# ifdef USE_AS_STRNCPY
147	cmp	$16, %ebx
148	jb	L(StrncpyExit15Bytes)
149# endif
150	cmpb	$0, 8(%ecx)
151	jz	L(ExitTail9)
152	cmpb	$0, 9(%ecx)
153	jz	L(ExitTail10)
154	cmpb	$0, 10(%ecx)
155	jz	L(ExitTail11)
156	cmpb	$0, 11(%ecx)
157	jz	L(ExitTail12)
158	cmpb	$0, 12(%ecx)
159	jz	L(ExitTail13)
160	cmpb	$0, 13(%ecx)
161	jz	L(ExitTail14)
162	cmpb	$0, 14(%ecx)
163	jz	L(ExitTail15)
164# if defined USE_AS_STRNCPY && !defined USE_AS_STRLCPY
165	cmp	$16, %ebx
166	je	L(ExitTail16)
167# endif
168	cmpb	$0, 15(%ecx)
169	jz	L(ExitTail16)
170
171# if defined USE_AS_STRNCPY && defined USE_AS_STRLCPY
172	cmp	$16, %ebx
173	je	L(StrlcpyExitTail16)
174# endif
175
176	PUSH	(%edi)
177# ifndef USE_AS_STRLCPY
178	mov	%edx, %edi
179# else
180	mov	%ecx, %edi
181# endif
182#endif
183	PUSH	(%esi)
184#ifdef USE_AS_STRNCPY
185	mov	%ecx, %esi
186	sub	$16, %ebx
187	and	$0xf, %esi
188
189/* add 16 bytes ecx_offset to ebx */
190
191	add	%esi, %ebx
192#endif
193	lea	16(%ecx), %esi
194	and	$-16, %esi
195	pxor	%xmm0, %xmm0
196	movlpd	(%ecx), %xmm1
197	movlpd	%xmm1, (%edx)
198
199	pcmpeqb	(%esi), %xmm0
200	movlpd	8(%ecx), %xmm1
201	movlpd	%xmm1, 8(%edx)
202
203	pmovmskb %xmm0, %eax
204	sub	%ecx, %esi
205
206#ifdef USE_AS_STRNCPY
207	sub	$16, %ebx
208	jbe	L(CopyFrom1To16BytesCase2OrCase3)
209#endif
210	test	%eax, %eax
211	jnz	L(CopyFrom1To16Bytes)
212
213	mov	%edx, %eax
214	lea	16(%edx), %edx
215	and	$-16, %edx
216	sub	%edx, %eax
217
218#ifdef USE_AS_STRNCPY
219	add	%eax, %esi
220	lea	-1(%esi), %esi
221	and	$1<<31, %esi
222	test	%esi, %esi
223	jnz	L(ContinueCopy)
224	lea	16(%ebx), %ebx
225
226L(ContinueCopy):
227#endif
228	sub	%eax, %ecx
229	mov	%ecx, %eax
230	and	$0xf, %eax
231	mov	$0, %esi
232
233/* case: ecx_offset == edx_offset */
234
235	jz	L(Align16Both)
236
237	cmp	$8, %eax
238	jae	L(ShlHigh8)
239	cmp	$1, %eax
240	je	L(Shl1)
241	cmp	$2, %eax
242	je	L(Shl2)
243	cmp	$3, %eax
244	je	L(Shl3)
245	cmp	$4, %eax
246	je	L(Shl4)
247	cmp	$5, %eax
248	je	L(Shl5)
249	cmp	$6, %eax
250	je	L(Shl6)
251	jmp	L(Shl7)
252
253L(ShlHigh8):
254	je	L(Shl8)
255	cmp	$9, %eax
256	je	L(Shl9)
257	cmp	$10, %eax
258	je	L(Shl10)
259	cmp	$11, %eax
260	je	L(Shl11)
261	cmp	$12, %eax
262	je	L(Shl12)
263	cmp	$13, %eax
264	je	L(Shl13)
265	cmp	$14, %eax
266	je	L(Shl14)
267	jmp	L(Shl15)
268
269L(Align16Both):
270	movaps	(%ecx), %xmm1
271	movaps	16(%ecx), %xmm2
272	movaps	%xmm1, (%edx)
273	pcmpeqb	%xmm2, %xmm0
274	pmovmskb %xmm0, %eax
275	lea	16(%esi), %esi
276#ifdef USE_AS_STRNCPY
277	sub	$16, %ebx
278	jbe	L(CopyFrom1To16BytesCase2OrCase3)
279#endif
280	test	%eax, %eax
281	jnz	L(CopyFrom1To16Bytes)
282
283	movaps	16(%ecx, %esi), %xmm3
284	movaps	%xmm2, (%edx, %esi)
285	pcmpeqb	%xmm3, %xmm0
286	pmovmskb %xmm0, %eax
287	lea	16(%esi), %esi
288#ifdef USE_AS_STRNCPY
289	sub	$16, %ebx
290	jbe	L(CopyFrom1To16BytesCase2OrCase3)
291#endif
292	test	%eax, %eax
293	jnz	L(CopyFrom1To16Bytes)
294
295	movaps	16(%ecx, %esi), %xmm4
296	movaps	%xmm3, (%edx, %esi)
297	pcmpeqb	%xmm4, %xmm0
298	pmovmskb %xmm0, %eax
299	lea	16(%esi), %esi
300#ifdef USE_AS_STRNCPY
301	sub	$16, %ebx
302	jbe	L(CopyFrom1To16BytesCase2OrCase3)
303#endif
304	test	%eax, %eax
305	jnz	L(CopyFrom1To16Bytes)
306
307	movaps	16(%ecx, %esi), %xmm1
308	movaps	%xmm4, (%edx, %esi)
309	pcmpeqb	%xmm1, %xmm0
310	pmovmskb %xmm0, %eax
311	lea	16(%esi), %esi
312#ifdef USE_AS_STRNCPY
313	sub	$16, %ebx
314	jbe	L(CopyFrom1To16BytesCase2OrCase3)
315#endif
316	test	%eax, %eax
317	jnz	L(CopyFrom1To16Bytes)
318
319	movaps	16(%ecx, %esi), %xmm2
320	movaps	%xmm1, (%edx, %esi)
321	pcmpeqb	%xmm2, %xmm0
322	pmovmskb %xmm0, %eax
323	lea	16(%esi), %esi
324#ifdef USE_AS_STRNCPY
325	sub	$16, %ebx
326	jbe	L(CopyFrom1To16BytesCase2OrCase3)
327#endif
328	test	%eax, %eax
329	jnz	L(CopyFrom1To16Bytes)
330
331	movaps	16(%ecx, %esi), %xmm3
332	movaps	%xmm2, (%edx, %esi)
333	pcmpeqb	%xmm3, %xmm0
334	pmovmskb %xmm0, %eax
335	lea	16(%esi), %esi
336#ifdef USE_AS_STRNCPY
337	sub	$16, %ebx
338	jbe	L(CopyFrom1To16BytesCase2OrCase3)
339#endif
340	test	%eax, %eax
341	jnz	L(CopyFrom1To16Bytes)
342
343	movaps	%xmm3, (%edx, %esi)
344	mov	%ecx, %eax
345	lea	16(%ecx, %esi), %ecx
346	and	$-0x40, %ecx
347	sub	%ecx, %eax
348	sub	%eax, %edx
349#ifdef USE_AS_STRNCPY
350	lea	112(%ebx, %eax), %ebx
351#endif
352	mov	$-0x40, %esi
353
354L(Aligned64Loop):
355	movaps	(%ecx), %xmm2
356	movaps	32(%ecx), %xmm3
357	movaps	%xmm2, %xmm4
358	movaps	16(%ecx), %xmm5
359	movaps	%xmm3, %xmm6
360	movaps	48(%ecx), %xmm7
361	pminub	%xmm5, %xmm2
362	pminub	%xmm7, %xmm3
363	pminub	%xmm2, %xmm3
364	lea	64(%edx), %edx
365	pcmpeqb	%xmm0, %xmm3
366	lea	64(%ecx), %ecx
367	pmovmskb %xmm3, %eax
368#ifdef USE_AS_STRNCPY
369	sub	$64, %ebx
370	jbe	L(StrncpyLeaveCase2OrCase3)
371#endif
372	test	%eax, %eax
373	jnz	L(Aligned64Leave)
374	movaps	%xmm4, -64(%edx)
375	movaps	%xmm5, -48(%edx)
376	movaps	%xmm6, -32(%edx)
377	movaps	%xmm7, -16(%edx)
378	jmp	L(Aligned64Loop)
379
380L(Aligned64Leave):
381#ifdef USE_AS_STRNCPY
382	lea	48(%ebx), %ebx
383#endif
384	pcmpeqb	%xmm4, %xmm0
385	pmovmskb %xmm0, %eax
386	test	%eax, %eax
387	jnz	L(CopyFrom1To16Bytes)
388
389	pcmpeqb	%xmm5, %xmm0
390#ifdef USE_AS_STRNCPY
391	lea	-16(%ebx), %ebx
392#endif
393	pmovmskb %xmm0, %eax
394	movaps	%xmm4, -64(%edx)
395	lea	16(%esi), %esi
396	test	%eax, %eax
397	jnz	L(CopyFrom1To16Bytes)
398
399	pcmpeqb	%xmm6, %xmm0
400#ifdef USE_AS_STRNCPY
401	lea	-16(%ebx), %ebx
402#endif
403	pmovmskb %xmm0, %eax
404	movaps	%xmm5, -48(%edx)
405	lea	16(%esi), %esi
406	test	%eax, %eax
407	jnz	L(CopyFrom1To16Bytes)
408
409	movaps	%xmm6, -32(%edx)
410	pcmpeqb	%xmm7, %xmm0
411#ifdef USE_AS_STRNCPY
412	lea	-16(%ebx), %ebx
413#endif
414	pmovmskb %xmm0, %eax
415	lea	16(%esi), %esi
416	jmp	L(CopyFrom1To16Bytes)
417
418	.p2align 4
419L(Shl1):
420	movaps	-1(%ecx), %xmm1
421	movaps	15(%ecx), %xmm2
422L(Shl1Start):
423	pcmpeqb	%xmm2, %xmm0
424	pmovmskb %xmm0, %eax
425	movaps	%xmm2, %xmm3
426#ifdef USE_AS_STRNCPY
427	sub	$16, %ebx
428	jbe	L(StrncpyExit1Case2OrCase3)
429#endif
430	test	%eax, %eax
431	jnz	L(Shl1LoopExit)
432
433	palignr	$1, %xmm1, %xmm2
434	movaps	%xmm3, %xmm1
435	movaps	%xmm2, (%edx)
436	movaps	31(%ecx), %xmm2
437
438	pcmpeqb	%xmm2, %xmm0
439	lea	16(%edx), %edx
440	pmovmskb %xmm0, %eax
441	lea	16(%ecx), %ecx
442	movaps	%xmm2, %xmm3
443#ifdef USE_AS_STRNCPY
444	sub	$16, %ebx
445	jbe	L(StrncpyExit1Case2OrCase3)
446#endif
447	test	%eax, %eax
448	jnz	L(Shl1LoopExit)
449
450	palignr	$1, %xmm1, %xmm2
451	movaps	%xmm2, (%edx)
452	movaps	31(%ecx), %xmm2
453	movaps	%xmm3, %xmm1
454
455	pcmpeqb	%xmm2, %xmm0
456	lea	16(%edx), %edx
457	pmovmskb %xmm0, %eax
458	lea	16(%ecx), %ecx
459	movaps	%xmm2, %xmm3
460#ifdef USE_AS_STRNCPY
461	sub	$16, %ebx
462	jbe	L(StrncpyExit1Case2OrCase3)
463#endif
464	test	%eax, %eax
465	jnz	L(Shl1LoopExit)
466
467	palignr	$1, %xmm1, %xmm2
468	movaps	%xmm2, (%edx)
469	movaps	31(%ecx), %xmm2
470
471	pcmpeqb	%xmm2, %xmm0
472	lea	16(%edx), %edx
473	pmovmskb %xmm0, %eax
474	lea	16(%ecx), %ecx
475#ifdef USE_AS_STRNCPY
476	sub	$16, %ebx
477	jbe	L(StrncpyExit1Case2OrCase3)
478#endif
479	test	%eax, %eax
480	jnz	L(Shl1LoopExit)
481
482	palignr	$1, %xmm3, %xmm2
483	movaps	%xmm2, (%edx)
484	lea	31(%ecx), %ecx
485	lea	16(%edx), %edx
486
487	mov	%ecx, %eax
488	and	$-0x40, %ecx
489	sub	%ecx, %eax
490	lea	-15(%ecx), %ecx
491	sub	%eax, %edx
492#ifdef USE_AS_STRNCPY
493	add	%eax, %ebx
494#endif
495	movaps	-1(%ecx), %xmm1
496
497L(Shl1LoopStart):
498	movaps	15(%ecx), %xmm2
499	movaps	31(%ecx), %xmm3
500	movaps	%xmm3, %xmm6
501	movaps	47(%ecx), %xmm4
502	movaps	%xmm4, %xmm7
503	movaps	63(%ecx), %xmm5
504	pminub	%xmm2, %xmm6
505	pminub	%xmm5, %xmm7
506	pminub	%xmm6, %xmm7
507	pcmpeqb	%xmm0, %xmm7
508	pmovmskb %xmm7, %eax
509	movaps	%xmm5, %xmm7
510	palignr	$1, %xmm4, %xmm5
511	palignr	$1, %xmm3, %xmm4
512	test	%eax, %eax
513	jnz	L(Shl1Start)
514#ifdef USE_AS_STRNCPY
515	sub	$64, %ebx
516	jbe	L(StrncpyLeave1)
517#endif
518	palignr	$1, %xmm2, %xmm3
519	lea	64(%ecx), %ecx
520	palignr	$1, %xmm1, %xmm2
521	movaps	%xmm7, %xmm1
522	movaps	%xmm5, 48(%edx)
523	movaps	%xmm4, 32(%edx)
524	movaps	%xmm3, 16(%edx)
525	movaps	%xmm2, (%edx)
526	lea	64(%edx), %edx
527	jmp	L(Shl1LoopStart)
528
529L(Shl1LoopExit):
530	movlpd	(%ecx), %xmm0
531	movlpd	%xmm0, (%edx)
532	movlpd	7(%ecx), %xmm0
533	movlpd	%xmm0, 7(%edx)
534	mov	$15, %esi
535	jmp	L(CopyFrom1To16Bytes)
536
537	.p2align 4
538L(Shl2):
539	movaps	-2(%ecx), %xmm1
540	movaps	14(%ecx), %xmm2
541L(Shl2Start):
542	pcmpeqb	%xmm2, %xmm0
543	pmovmskb %xmm0, %eax
544	movaps	%xmm2, %xmm3
545#ifdef USE_AS_STRNCPY
546	sub	$16, %ebx
547	jbe	L(StrncpyExit2Case2OrCase3)
548#endif
549	test	%eax, %eax
550	jnz	L(Shl2LoopExit)
551
552	palignr	$2, %xmm1, %xmm2
553	movaps	%xmm3, %xmm1
554	movaps	%xmm2, (%edx)
555	movaps	30(%ecx), %xmm2
556
557	pcmpeqb	%xmm2, %xmm0
558	lea	16(%edx), %edx
559	pmovmskb %xmm0, %eax
560	lea	16(%ecx), %ecx
561	movaps	%xmm2, %xmm3
562#ifdef USE_AS_STRNCPY
563	sub	$16, %ebx
564	jbe	L(StrncpyExit2Case2OrCase3)
565#endif
566	test	%eax, %eax
567	jnz	L(Shl2LoopExit)
568
569	palignr	$2, %xmm1, %xmm2
570	movaps	%xmm2, (%edx)
571	movaps	30(%ecx), %xmm2
572	movaps	%xmm3, %xmm1
573
574	pcmpeqb	%xmm2, %xmm0
575	lea	16(%edx), %edx
576	pmovmskb %xmm0, %eax
577	lea	16(%ecx), %ecx
578	movaps	%xmm2, %xmm3
579#ifdef USE_AS_STRNCPY
580	sub	$16, %ebx
581	jbe	L(StrncpyExit2Case2OrCase3)
582#endif
583	test	%eax, %eax
584	jnz	L(Shl2LoopExit)
585
586	palignr	$2, %xmm1, %xmm2
587	movaps	%xmm2, (%edx)
588	movaps	30(%ecx), %xmm2
589
590	pcmpeqb	%xmm2, %xmm0
591	lea	16(%edx), %edx
592	pmovmskb %xmm0, %eax
593	lea	16(%ecx), %ecx
594#ifdef USE_AS_STRNCPY
595	sub	$16, %ebx
596	jbe	L(StrncpyExit2Case2OrCase3)
597#endif
598	test	%eax, %eax
599	jnz	L(Shl2LoopExit)
600
601	palignr	$2, %xmm3, %xmm2
602	movaps	%xmm2, (%edx)
603	lea	30(%ecx), %ecx
604	lea	16(%edx), %edx
605
606	mov	%ecx, %eax
607	and	$-0x40, %ecx
608	sub	%ecx, %eax
609	lea	-14(%ecx), %ecx
610	sub	%eax, %edx
611#ifdef USE_AS_STRNCPY
612	add	%eax, %ebx
613#endif
614	movaps	-2(%ecx), %xmm1
615
616L(Shl2LoopStart):
617	movaps	14(%ecx), %xmm2
618	movaps	30(%ecx), %xmm3
619	movaps	%xmm3, %xmm6
620	movaps	46(%ecx), %xmm4
621	movaps	%xmm4, %xmm7
622	movaps	62(%ecx), %xmm5
623	pminub	%xmm2, %xmm6
624	pminub	%xmm5, %xmm7
625	pminub	%xmm6, %xmm7
626	pcmpeqb	%xmm0, %xmm7
627	pmovmskb %xmm7, %eax
628	movaps	%xmm5, %xmm7
629	palignr	$2, %xmm4, %xmm5
630	palignr	$2, %xmm3, %xmm4
631	test	%eax, %eax
632	jnz	L(Shl2Start)
633#ifdef USE_AS_STRNCPY
634	sub	$64, %ebx
635	jbe	L(StrncpyLeave2)
636#endif
637	palignr	$2, %xmm2, %xmm3
638	lea	64(%ecx), %ecx
639	palignr	$2, %xmm1, %xmm2
640	movaps	%xmm7, %xmm1
641	movaps	%xmm5, 48(%edx)
642	movaps	%xmm4, 32(%edx)
643	movaps	%xmm3, 16(%edx)
644	movaps	%xmm2, (%edx)
645	lea	64(%edx), %edx
646	jmp	L(Shl2LoopStart)
647
648L(Shl2LoopExit):
649	movlpd	(%ecx), %xmm0
650	movlpd	6(%ecx), %xmm1
651	movlpd	%xmm0, (%edx)
652	movlpd	%xmm1, 6(%edx)
653	mov	$14, %esi
654	jmp	L(CopyFrom1To16Bytes)
655
656	.p2align 4
657L(Shl3):
658	movaps	-3(%ecx), %xmm1
659	movaps	13(%ecx), %xmm2
660L(Shl3Start):
661	pcmpeqb	%xmm2, %xmm0
662	pmovmskb %xmm0, %eax
663	movaps	%xmm2, %xmm3
664#ifdef USE_AS_STRNCPY
665	sub	$16, %ebx
666	jbe	L(StrncpyExit3Case2OrCase3)
667#endif
668	test	%eax, %eax
669	jnz	L(Shl3LoopExit)
670
671	palignr	$3, %xmm1, %xmm2
672	movaps	%xmm3, %xmm1
673	movaps	%xmm2, (%edx)
674	movaps	29(%ecx), %xmm2
675
676	pcmpeqb	%xmm2, %xmm0
677	lea	16(%edx), %edx
678	pmovmskb %xmm0, %eax
679	lea	16(%ecx), %ecx
680	movaps	%xmm2, %xmm3
681#ifdef USE_AS_STRNCPY
682	sub	$16, %ebx
683	jbe	L(StrncpyExit3Case2OrCase3)
684#endif
685	test	%eax, %eax
686	jnz	L(Shl3LoopExit)
687
688	palignr	$3, %xmm1, %xmm2
689	movaps	%xmm2, (%edx)
690	movaps	29(%ecx), %xmm2
691	movaps	%xmm3, %xmm1
692
693	pcmpeqb	%xmm2, %xmm0
694	lea	16(%edx), %edx
695	pmovmskb %xmm0, %eax
696	lea	16(%ecx), %ecx
697	movaps	%xmm2, %xmm3
698#ifdef USE_AS_STRNCPY
699	sub	$16, %ebx
700	jbe	L(StrncpyExit3Case2OrCase3)
701#endif
702	test	%eax, %eax
703	jnz	L(Shl3LoopExit)
704
705	palignr	$3, %xmm1, %xmm2
706	movaps	%xmm2, (%edx)
707	movaps	29(%ecx), %xmm2
708
709	pcmpeqb	%xmm2, %xmm0
710	lea	16(%edx), %edx
711	pmovmskb %xmm0, %eax
712	lea	16(%ecx), %ecx
713#ifdef USE_AS_STRNCPY
714	sub	$16, %ebx
715	jbe	L(StrncpyExit3Case2OrCase3)
716#endif
717	test	%eax, %eax
718	jnz	L(Shl3LoopExit)
719
720	palignr	$3, %xmm3, %xmm2
721	movaps	%xmm2, (%edx)
722	lea	29(%ecx), %ecx
723	lea	16(%edx), %edx
724
725	mov	%ecx, %eax
726	and	$-0x40, %ecx
727	sub	%ecx, %eax
728	lea	-13(%ecx), %ecx
729	sub	%eax, %edx
730#ifdef USE_AS_STRNCPY
731	add	%eax, %ebx
732#endif
733	movaps	-3(%ecx), %xmm1
734
735L(Shl3LoopStart):
736	movaps	13(%ecx), %xmm2
737	movaps	29(%ecx), %xmm3
738	movaps	%xmm3, %xmm6
739	movaps	45(%ecx), %xmm4
740	movaps	%xmm4, %xmm7
741	movaps	61(%ecx), %xmm5
742	pminub	%xmm2, %xmm6
743	pminub	%xmm5, %xmm7
744	pminub	%xmm6, %xmm7
745	pcmpeqb	%xmm0, %xmm7
746	pmovmskb %xmm7, %eax
747	movaps	%xmm5, %xmm7
748	palignr	$3, %xmm4, %xmm5
749	palignr	$3, %xmm3, %xmm4
750	test	%eax, %eax
751	jnz	L(Shl3Start)
752#ifdef USE_AS_STRNCPY
753	sub	$64, %ebx
754	jbe	L(StrncpyLeave3)
755#endif
756	palignr	$3, %xmm2, %xmm3
757	lea	64(%ecx), %ecx
758	palignr	$3, %xmm1, %xmm2
759	movaps	%xmm7, %xmm1
760	movaps	%xmm5, 48(%edx)
761	movaps	%xmm4, 32(%edx)
762	movaps	%xmm3, 16(%edx)
763	movaps	%xmm2, (%edx)
764	lea	64(%edx), %edx
765	jmp	L(Shl3LoopStart)
766
767L(Shl3LoopExit):
768	movlpd	(%ecx), %xmm0
769	movlpd	5(%ecx), %xmm1
770	movlpd	%xmm0, (%edx)
771	movlpd	%xmm1, 5(%edx)
772	mov	$13, %esi
773	jmp	L(CopyFrom1To16Bytes)
774
775	.p2align 4
776L(Shl4):
777	movaps	-4(%ecx), %xmm1
778	movaps	12(%ecx), %xmm2
779L(Shl4Start):
780	pcmpeqb	%xmm2, %xmm0
781	pmovmskb %xmm0, %eax
782	movaps	%xmm2, %xmm3
783#ifdef USE_AS_STRNCPY
784	sub	$16, %ebx
785	jbe	L(StrncpyExit4Case2OrCase3)
786#endif
787	test	%eax, %eax
788	jnz	L(Shl4LoopExit)
789
790	palignr	$4, %xmm1, %xmm2
791	movaps	%xmm3, %xmm1
792	movaps	%xmm2, (%edx)
793	movaps	28(%ecx), %xmm2
794
795	pcmpeqb	%xmm2, %xmm0
796	lea	16(%edx), %edx
797	pmovmskb %xmm0, %eax
798	lea	16(%ecx), %ecx
799	movaps	%xmm2, %xmm3
800#ifdef USE_AS_STRNCPY
801	sub	$16, %ebx
802	jbe	L(StrncpyExit4Case2OrCase3)
803#endif
804	test	%eax, %eax
805	jnz	L(Shl4LoopExit)
806
807	palignr	$4, %xmm1, %xmm2
808	movaps	%xmm2, (%edx)
809	movaps	28(%ecx), %xmm2
810	movaps	%xmm3, %xmm1
811
812	pcmpeqb	%xmm2, %xmm0
813	lea	16(%edx), %edx
814	pmovmskb %xmm0, %eax
815	lea	16(%ecx), %ecx
816	movaps	%xmm2, %xmm3
817#ifdef USE_AS_STRNCPY
818	sub	$16, %ebx
819	jbe	L(StrncpyExit4Case2OrCase3)
820#endif
821	test	%eax, %eax
822	jnz	L(Shl4LoopExit)
823
824	palignr	$4, %xmm1, %xmm2
825	movaps	%xmm2, (%edx)
826	movaps	28(%ecx), %xmm2
827
828	pcmpeqb	%xmm2, %xmm0
829	lea	16(%edx), %edx
830	pmovmskb %xmm0, %eax
831	lea	16(%ecx), %ecx
832#ifdef USE_AS_STRNCPY
833	sub	$16, %ebx
834	jbe	L(StrncpyExit4Case2OrCase3)
835#endif
836	test	%eax, %eax
837	jnz	L(Shl4LoopExit)
838
839	palignr	$4, %xmm3, %xmm2
840	movaps	%xmm2, (%edx)
841	lea	28(%ecx), %ecx
842	lea	16(%edx), %edx
843
844	mov	%ecx, %eax
845	and	$-0x40, %ecx
846	sub	%ecx, %eax
847	lea	-12(%ecx), %ecx
848	sub	%eax, %edx
849#ifdef USE_AS_STRNCPY
850	add	%eax, %ebx
851#endif
852	movaps	-4(%ecx), %xmm1
853
854L(Shl4LoopStart):
855	movaps	12(%ecx), %xmm2
856	movaps	28(%ecx), %xmm3
857	movaps	%xmm3, %xmm6
858	movaps	44(%ecx), %xmm4
859	movaps	%xmm4, %xmm7
860	movaps	60(%ecx), %xmm5
861	pminub	%xmm2, %xmm6
862	pminub	%xmm5, %xmm7
863	pminub	%xmm6, %xmm7
864	pcmpeqb	%xmm0, %xmm7
865	pmovmskb %xmm7, %eax
866	movaps	%xmm5, %xmm7
867	palignr	$4, %xmm4, %xmm5
868	palignr	$4, %xmm3, %xmm4
869	test	%eax, %eax
870	jnz	L(Shl4Start)
871#ifdef USE_AS_STRNCPY
872	sub	$64, %ebx
873	jbe	L(StrncpyLeave4)
874#endif
875	palignr	$4, %xmm2, %xmm3
876	lea	64(%ecx), %ecx
877	palignr	$4, %xmm1, %xmm2
878	movaps	%xmm7, %xmm1
879	movaps	%xmm5, 48(%edx)
880	movaps	%xmm4, 32(%edx)
881	movaps	%xmm3, 16(%edx)
882	movaps	%xmm2, (%edx)
883	lea	64(%edx), %edx
884	jmp	L(Shl4LoopStart)
885
886L(Shl4LoopExit):
887	movlpd	(%ecx), %xmm0
888	movl	8(%ecx), %esi
889	movlpd	%xmm0, (%edx)
890	movl	%esi, 8(%edx)
891	mov	$12, %esi
892	jmp	L(CopyFrom1To16Bytes)
893
894	.p2align 4
895L(Shl5):
896	movaps	-5(%ecx), %xmm1
897	movaps	11(%ecx), %xmm2
898L(Shl5Start):
899	pcmpeqb	%xmm2, %xmm0
900	pmovmskb %xmm0, %eax
901	movaps	%xmm2, %xmm3
902#ifdef USE_AS_STRNCPY
903	sub	$16, %ebx
904	jbe	L(StrncpyExit5Case2OrCase3)
905#endif
906	test	%eax, %eax
907	jnz	L(Shl5LoopExit)
908
909	palignr	$5, %xmm1, %xmm2
910	movaps	%xmm3, %xmm1
911	movaps	%xmm2, (%edx)
912	movaps	27(%ecx), %xmm2
913
914	pcmpeqb	%xmm2, %xmm0
915	lea	16(%edx), %edx
916	pmovmskb %xmm0, %eax
917	lea	16(%ecx), %ecx
918	movaps	%xmm2, %xmm3
919#ifdef USE_AS_STRNCPY
920	sub	$16, %ebx
921	jbe	L(StrncpyExit5Case2OrCase3)
922#endif
923	test	%eax, %eax
924	jnz	L(Shl5LoopExit)
925
926	palignr	$5, %xmm1, %xmm2
927	movaps	%xmm2, (%edx)
928	movaps	27(%ecx), %xmm2
929	movaps	%xmm3, %xmm1
930
931	pcmpeqb	%xmm2, %xmm0
932	lea	16(%edx), %edx
933	pmovmskb %xmm0, %eax
934	lea	16(%ecx), %ecx
935	movaps	%xmm2, %xmm3
936#ifdef USE_AS_STRNCPY
937	sub	$16, %ebx
938	jbe	L(StrncpyExit5Case2OrCase3)
939#endif
940	test	%eax, %eax
941	jnz	L(Shl5LoopExit)
942
943	palignr	$5, %xmm1, %xmm2
944	movaps	%xmm2, (%edx)
945	movaps	27(%ecx), %xmm2
946
947	pcmpeqb	%xmm2, %xmm0
948	lea	16(%edx), %edx
949	pmovmskb %xmm0, %eax
950	lea	16(%ecx), %ecx
951#ifdef USE_AS_STRNCPY
952	sub	$16, %ebx
953	jbe	L(StrncpyExit5Case2OrCase3)
954#endif
955	test	%eax, %eax
956	jnz	L(Shl5LoopExit)
957
958	palignr	$5, %xmm3, %xmm2
959	movaps	%xmm2, (%edx)
960	lea	27(%ecx), %ecx
961	lea	16(%edx), %edx
962
963	mov	%ecx, %eax
964	and	$-0x40, %ecx
965	sub	%ecx, %eax
966	lea	-11(%ecx), %ecx
967	sub	%eax, %edx
968#ifdef USE_AS_STRNCPY
969	add	%eax, %ebx
970#endif
971	movaps	-5(%ecx), %xmm1
972
973L(Shl5LoopStart):
974	movaps	11(%ecx), %xmm2
975	movaps	27(%ecx), %xmm3
976	movaps	%xmm3, %xmm6
977	movaps	43(%ecx), %xmm4
978	movaps	%xmm4, %xmm7
979	movaps	59(%ecx), %xmm5
980	pminub	%xmm2, %xmm6
981	pminub	%xmm5, %xmm7
982	pminub	%xmm6, %xmm7
983	pcmpeqb	%xmm0, %xmm7
984	pmovmskb %xmm7, %eax
985	movaps	%xmm5, %xmm7
986	palignr	$5, %xmm4, %xmm5
987	palignr	$5, %xmm3, %xmm4
988	test	%eax, %eax
989	jnz	L(Shl5Start)
990#ifdef USE_AS_STRNCPY
991	sub	$64, %ebx
992	jbe	L(StrncpyLeave5)
993#endif
994	palignr	$5, %xmm2, %xmm3
995	lea	64(%ecx), %ecx
996	palignr	$5, %xmm1, %xmm2
997	movaps	%xmm7, %xmm1
998	movaps	%xmm5, 48(%edx)
999	movaps	%xmm4, 32(%edx)
1000	movaps	%xmm3, 16(%edx)
1001	movaps	%xmm2, (%edx)
1002	lea	64(%edx), %edx
1003	jmp	L(Shl5LoopStart)
1004
1005L(Shl5LoopExit):
1006	movlpd	(%ecx), %xmm0
1007	movl	7(%ecx), %esi
1008	movlpd	%xmm0, (%edx)
1009	movl	%esi, 7(%edx)
1010	mov	$11, %esi
1011	jmp	L(CopyFrom1To16Bytes)
1012
1013	.p2align 4
1014L(Shl6):
1015	movaps	-6(%ecx), %xmm1
1016	movaps	10(%ecx), %xmm2
1017L(Shl6Start):
1018	pcmpeqb	%xmm2, %xmm0
1019	pmovmskb %xmm0, %eax
1020	movaps	%xmm2, %xmm3
1021#ifdef USE_AS_STRNCPY
1022	sub	$16, %ebx
1023	jbe	L(StrncpyExit6Case2OrCase3)
1024#endif
1025	test	%eax, %eax
1026	jnz	L(Shl6LoopExit)
1027
1028	palignr	$6, %xmm1, %xmm2
1029	movaps	%xmm3, %xmm1
1030	movaps	%xmm2, (%edx)
1031	movaps	26(%ecx), %xmm2
1032
1033	pcmpeqb	%xmm2, %xmm0
1034	lea	16(%edx), %edx
1035	pmovmskb %xmm0, %eax
1036	lea	16(%ecx), %ecx
1037	movaps	%xmm2, %xmm3
1038#ifdef USE_AS_STRNCPY
1039	sub	$16, %ebx
1040	jbe	L(StrncpyExit6Case2OrCase3)
1041#endif
1042	test	%eax, %eax
1043	jnz	L(Shl6LoopExit)
1044
1045	palignr	$6, %xmm1, %xmm2
1046	movaps	%xmm2, (%edx)
1047	movaps	26(%ecx), %xmm2
1048	movaps	%xmm3, %xmm1
1049
1050	pcmpeqb	%xmm2, %xmm0
1051	lea	16(%edx), %edx
1052	pmovmskb %xmm0, %eax
1053	lea	16(%ecx), %ecx
1054	movaps	%xmm2, %xmm3
1055#ifdef USE_AS_STRNCPY
1056	sub	$16, %ebx
1057	jbe	L(StrncpyExit6Case2OrCase3)
1058#endif
1059	test	%eax, %eax
1060	jnz	L(Shl6LoopExit)
1061
1062	palignr	$6, %xmm1, %xmm2
1063	movaps	%xmm2, (%edx)
1064	movaps	26(%ecx), %xmm2
1065
1066	pcmpeqb	%xmm2, %xmm0
1067	lea	16(%edx), %edx
1068	pmovmskb %xmm0, %eax
1069	lea	16(%ecx), %ecx
1070#ifdef USE_AS_STRNCPY
1071	sub	$16, %ebx
1072	jbe	L(StrncpyExit6Case2OrCase3)
1073#endif
1074	test	%eax, %eax
1075	jnz	L(Shl6LoopExit)
1076
1077	palignr	$6, %xmm3, %xmm2
1078	movaps	%xmm2, (%edx)
1079	lea	26(%ecx), %ecx
1080	lea	16(%edx), %edx
1081
1082	mov	%ecx, %eax
1083	and	$-0x40, %ecx
1084	sub	%ecx, %eax
1085	lea	-10(%ecx), %ecx
1086	sub	%eax, %edx
1087#ifdef USE_AS_STRNCPY
1088	add	%eax, %ebx
1089#endif
1090	movaps	-6(%ecx), %xmm1
1091
1092L(Shl6LoopStart):
1093	movaps	10(%ecx), %xmm2
1094	movaps	26(%ecx), %xmm3
1095	movaps	%xmm3, %xmm6
1096	movaps	42(%ecx), %xmm4
1097	movaps	%xmm4, %xmm7
1098	movaps	58(%ecx), %xmm5
1099	pminub	%xmm2, %xmm6
1100	pminub	%xmm5, %xmm7
1101	pminub	%xmm6, %xmm7
1102	pcmpeqb	%xmm0, %xmm7
1103	pmovmskb %xmm7, %eax
1104	movaps	%xmm5, %xmm7
1105	palignr	$6, %xmm4, %xmm5
1106	palignr	$6, %xmm3, %xmm4
1107	test	%eax, %eax
1108	jnz	L(Shl6Start)
1109#ifdef USE_AS_STRNCPY
1110	sub	$64, %ebx
1111	jbe	L(StrncpyLeave6)
1112#endif
1113	palignr	$6, %xmm2, %xmm3
1114	lea	64(%ecx), %ecx
1115	palignr	$6, %xmm1, %xmm2
1116	movaps	%xmm7, %xmm1
1117	movaps	%xmm5, 48(%edx)
1118	movaps	%xmm4, 32(%edx)
1119	movaps	%xmm3, 16(%edx)
1120	movaps	%xmm2, (%edx)
1121	lea	64(%edx), %edx
1122	jmp	L(Shl6LoopStart)
1123
1124L(Shl6LoopExit):
1125	movlpd	(%ecx), %xmm0
1126	movl	6(%ecx), %esi
1127	movlpd	%xmm0, (%edx)
1128	movl	%esi, 6(%edx)
1129	mov	$10, %esi
1130	jmp	L(CopyFrom1To16Bytes)
1131
1132	.p2align 4
1133L(Shl7):
1134	movaps	-7(%ecx), %xmm1
1135	movaps	9(%ecx), %xmm2
1136L(Shl7Start):
1137	pcmpeqb	%xmm2, %xmm0
1138	pmovmskb %xmm0, %eax
1139	movaps	%xmm2, %xmm3
1140#ifdef USE_AS_STRNCPY
1141	sub	$16, %ebx
1142	jbe	L(StrncpyExit7Case2OrCase3)
1143#endif
1144	test	%eax, %eax
1145	jnz	L(Shl7LoopExit)
1146
1147	palignr	$7, %xmm1, %xmm2
1148	movaps	%xmm3, %xmm1
1149	movaps	%xmm2, (%edx)
1150	movaps	25(%ecx), %xmm2
1151
1152	pcmpeqb	%xmm2, %xmm0
1153	lea	16(%edx), %edx
1154	pmovmskb %xmm0, %eax
1155	lea	16(%ecx), %ecx
1156	movaps	%xmm2, %xmm3
1157#ifdef USE_AS_STRNCPY
1158	sub	$16, %ebx
1159	jbe	L(StrncpyExit7Case2OrCase3)
1160#endif
1161	test	%eax, %eax
1162	jnz	L(Shl7LoopExit)
1163
1164	palignr	$7, %xmm1, %xmm2
1165	movaps	%xmm2, (%edx)
1166	movaps	25(%ecx), %xmm2
1167	movaps	%xmm3, %xmm1
1168
1169	pcmpeqb	%xmm2, %xmm0
1170	lea	16(%edx), %edx
1171	pmovmskb %xmm0, %eax
1172	lea	16(%ecx), %ecx
1173	movaps	%xmm2, %xmm3
1174#ifdef USE_AS_STRNCPY
1175	sub	$16, %ebx
1176	jbe	L(StrncpyExit7Case2OrCase3)
1177#endif
1178	test	%eax, %eax
1179	jnz	L(Shl7LoopExit)
1180
1181	palignr	$7, %xmm1, %xmm2
1182	movaps	%xmm2, (%edx)
1183	movaps	25(%ecx), %xmm2
1184
1185	pcmpeqb	%xmm2, %xmm0
1186	lea	16(%edx), %edx
1187	pmovmskb %xmm0, %eax
1188	lea	16(%ecx), %ecx
1189#ifdef USE_AS_STRNCPY
1190	sub	$16, %ebx
1191	jbe	L(StrncpyExit7Case2OrCase3)
1192#endif
1193	test	%eax, %eax
1194	jnz	L(Shl7LoopExit)
1195
1196	palignr	$7, %xmm3, %xmm2
1197	movaps	%xmm2, (%edx)
1198	lea	25(%ecx), %ecx
1199	lea	16(%edx), %edx
1200
1201	mov	%ecx, %eax
1202	and	$-0x40, %ecx
1203	sub	%ecx, %eax
1204	lea	-9(%ecx), %ecx
1205	sub	%eax, %edx
1206#ifdef USE_AS_STRNCPY
1207	add	%eax, %ebx
1208#endif
1209	movaps	-7(%ecx), %xmm1
1210
1211L(Shl7LoopStart):
1212	movaps	9(%ecx), %xmm2
1213	movaps	25(%ecx), %xmm3
1214	movaps	%xmm3, %xmm6
1215	movaps	41(%ecx), %xmm4
1216	movaps	%xmm4, %xmm7
1217	movaps	57(%ecx), %xmm5
1218	pminub	%xmm2, %xmm6
1219	pminub	%xmm5, %xmm7
1220	pminub	%xmm6, %xmm7
1221	pcmpeqb	%xmm0, %xmm7
1222	pmovmskb %xmm7, %eax
1223	movaps	%xmm5, %xmm7
1224	palignr	$7, %xmm4, %xmm5
1225	palignr	$7, %xmm3, %xmm4
1226	test	%eax, %eax
1227	jnz	L(Shl7Start)
1228#ifdef USE_AS_STRNCPY
1229	sub	$64, %ebx
1230	jbe	L(StrncpyLeave7)
1231#endif
1232	palignr	$7, %xmm2, %xmm3
1233	lea	64(%ecx), %ecx
1234	palignr	$7, %xmm1, %xmm2
1235	movaps	%xmm7, %xmm1
1236	movaps	%xmm5, 48(%edx)
1237	movaps	%xmm4, 32(%edx)
1238	movaps	%xmm3, 16(%edx)
1239	movaps	%xmm2, (%edx)
1240	lea	64(%edx), %edx
1241	jmp	L(Shl7LoopStart)
1242
1243L(Shl7LoopExit):
1244	movlpd	(%ecx), %xmm0
1245	movl	5(%ecx), %esi
1246	movlpd	%xmm0, (%edx)
1247	movl	%esi, 5(%edx)
1248	mov	$9, %esi
1249	jmp	L(CopyFrom1To16Bytes)
1250
1251	.p2align 4
1252L(Shl8):
1253	movaps	-8(%ecx), %xmm1
1254	movaps	8(%ecx), %xmm2
1255L(Shl8Start):
1256	pcmpeqb	%xmm2, %xmm0
1257	pmovmskb %xmm0, %eax
1258	movaps	%xmm2, %xmm3
1259#ifdef USE_AS_STRNCPY
1260	sub	$16, %ebx
1261	jbe	L(StrncpyExit8Case2OrCase3)
1262#endif
1263	test	%eax, %eax
1264	jnz	L(Shl8LoopExit)
1265
1266	palignr	$8, %xmm1, %xmm2
1267	movaps	%xmm3, %xmm1
1268	movaps	%xmm2, (%edx)
1269	movaps	24(%ecx), %xmm2
1270
1271	pcmpeqb	%xmm2, %xmm0
1272	lea	16(%edx), %edx
1273	pmovmskb %xmm0, %eax
1274	lea	16(%ecx), %ecx
1275	movaps	%xmm2, %xmm3
1276#ifdef USE_AS_STRNCPY
1277	sub	$16, %ebx
1278	jbe	L(StrncpyExit8Case2OrCase3)
1279#endif
1280	test	%eax, %eax
1281	jnz	L(Shl8LoopExit)
1282
1283	palignr	$8, %xmm1, %xmm2
1284	movaps	%xmm2, (%edx)
1285	movaps	24(%ecx), %xmm2
1286	movaps	%xmm3, %xmm1
1287
1288	pcmpeqb	%xmm2, %xmm0
1289	lea	16(%edx), %edx
1290	pmovmskb %xmm0, %eax
1291	lea	16(%ecx), %ecx
1292	movaps	%xmm2, %xmm3
1293#ifdef USE_AS_STRNCPY
1294	sub	$16, %ebx
1295	jbe	L(StrncpyExit8Case2OrCase3)
1296#endif
1297	test	%eax, %eax
1298	jnz	L(Shl8LoopExit)
1299
1300	palignr	$8, %xmm1, %xmm2
1301	movaps	%xmm2, (%edx)
1302	movaps	24(%ecx), %xmm2
1303
1304	pcmpeqb	%xmm2, %xmm0
1305	lea	16(%edx), %edx
1306	pmovmskb %xmm0, %eax
1307	lea	16(%ecx), %ecx
1308#ifdef USE_AS_STRNCPY
1309	sub	$16, %ebx
1310	jbe	L(StrncpyExit8Case2OrCase3)
1311#endif
1312	test	%eax, %eax
1313	jnz	L(Shl8LoopExit)
1314
1315	palignr	$8, %xmm3, %xmm2
1316	movaps	%xmm2, (%edx)
1317	lea	24(%ecx), %ecx
1318	lea	16(%edx), %edx
1319
1320	mov	%ecx, %eax
1321	and	$-0x40, %ecx
1322	sub	%ecx, %eax
1323	lea	-8(%ecx), %ecx
1324	sub	%eax, %edx
1325#ifdef USE_AS_STRNCPY
1326	add	%eax, %ebx
1327#endif
1328	movaps	-8(%ecx), %xmm1
1329
1330L(Shl8LoopStart):
1331	movaps	8(%ecx), %xmm2
1332	movaps	24(%ecx), %xmm3
1333	movaps	%xmm3, %xmm6
1334	movaps	40(%ecx), %xmm4
1335	movaps	%xmm4, %xmm7
1336	movaps	56(%ecx), %xmm5
1337	pminub	%xmm2, %xmm6
1338	pminub	%xmm5, %xmm7
1339	pminub	%xmm6, %xmm7
1340	pcmpeqb	%xmm0, %xmm7
1341	pmovmskb %xmm7, %eax
1342	movaps	%xmm5, %xmm7
1343	palignr	$8, %xmm4, %xmm5
1344	palignr	$8, %xmm3, %xmm4
1345	test	%eax, %eax
1346	jnz	L(Shl8Start)
1347#ifdef USE_AS_STRNCPY
1348	sub	$64, %ebx
1349	jbe	L(StrncpyLeave8)
1350#endif
1351	palignr	$8, %xmm2, %xmm3
1352	lea	64(%ecx), %ecx
1353	palignr	$8, %xmm1, %xmm2
1354	movaps	%xmm7, %xmm1
1355	movaps	%xmm5, 48(%edx)
1356	movaps	%xmm4, 32(%edx)
1357	movaps	%xmm3, 16(%edx)
1358	movaps	%xmm2, (%edx)
1359	lea	64(%edx), %edx
1360	jmp	L(Shl8LoopStart)
1361
1362L(Shl8LoopExit):
1363	movlpd	(%ecx), %xmm0
1364	movlpd	%xmm0, (%edx)
1365	mov	$8, %esi
1366	jmp	L(CopyFrom1To16Bytes)
1367
1368	.p2align 4
1369L(Shl9):
1370	movaps	-9(%ecx), %xmm1
1371	movaps	7(%ecx), %xmm2
1372L(Shl9Start):
1373	pcmpeqb	%xmm2, %xmm0
1374	pmovmskb %xmm0, %eax
1375	movaps	%xmm2, %xmm3
1376#ifdef USE_AS_STRNCPY
1377	sub	$16, %ebx
1378	jbe	L(StrncpyExit9Case2OrCase3)
1379#endif
1380	test	%eax, %eax
1381	jnz	L(Shl9LoopExit)
1382
1383	palignr	$9, %xmm1, %xmm2
1384	movaps	%xmm3, %xmm1
1385	movaps	%xmm2, (%edx)
1386	movaps	23(%ecx), %xmm2
1387
1388	pcmpeqb	%xmm2, %xmm0
1389	lea	16(%edx), %edx
1390	pmovmskb %xmm0, %eax
1391	lea	16(%ecx), %ecx
1392	movaps	%xmm2, %xmm3
1393#ifdef USE_AS_STRNCPY
1394	sub	$16, %ebx
1395	jbe	L(StrncpyExit9Case2OrCase3)
1396#endif
1397	test	%eax, %eax
1398	jnz	L(Shl9LoopExit)
1399
1400	palignr	$9, %xmm1, %xmm2
1401	movaps	%xmm2, (%edx)
1402	movaps	23(%ecx), %xmm2
1403	movaps	%xmm3, %xmm1
1404
1405	pcmpeqb	%xmm2, %xmm0
1406	lea	16(%edx), %edx
1407	pmovmskb %xmm0, %eax
1408	lea	16(%ecx), %ecx
1409	movaps	%xmm2, %xmm3
1410#ifdef USE_AS_STRNCPY
1411	sub	$16, %ebx
1412	jbe	L(StrncpyExit9Case2OrCase3)
1413#endif
1414	test	%eax, %eax
1415	jnz	L(Shl9LoopExit)
1416
1417	palignr	$9, %xmm1, %xmm2
1418	movaps	%xmm2, (%edx)
1419	movaps	23(%ecx), %xmm2
1420
1421	pcmpeqb	%xmm2, %xmm0
1422	lea	16(%edx), %edx
1423	pmovmskb %xmm0, %eax
1424	lea	16(%ecx), %ecx
1425#ifdef USE_AS_STRNCPY
1426	sub	$16, %ebx
1427	jbe	L(StrncpyExit9Case2OrCase3)
1428#endif
1429	test	%eax, %eax
1430	jnz	L(Shl9LoopExit)
1431
1432	palignr	$9, %xmm3, %xmm2
1433	movaps	%xmm2, (%edx)
1434	lea	23(%ecx), %ecx
1435	lea	16(%edx), %edx
1436
1437	mov	%ecx, %eax
1438	and	$-0x40, %ecx
1439	sub	%ecx, %eax
1440	lea	-7(%ecx), %ecx
1441	sub	%eax, %edx
1442#ifdef USE_AS_STRNCPY
1443	add	%eax, %ebx
1444#endif
1445	movaps	-9(%ecx), %xmm1
1446
1447L(Shl9LoopStart):
1448	movaps	7(%ecx), %xmm2
1449	movaps	23(%ecx), %xmm3
1450	movaps	%xmm3, %xmm6
1451	movaps	39(%ecx), %xmm4
1452	movaps	%xmm4, %xmm7
1453	movaps	55(%ecx), %xmm5
1454	pminub	%xmm2, %xmm6
1455	pminub	%xmm5, %xmm7
1456	pminub	%xmm6, %xmm7
1457	pcmpeqb	%xmm0, %xmm7
1458	pmovmskb %xmm7, %eax
1459	movaps	%xmm5, %xmm7
1460	palignr	$9, %xmm4, %xmm5
1461	palignr	$9, %xmm3, %xmm4
1462	test	%eax, %eax
1463	jnz	L(Shl9Start)
1464#ifdef USE_AS_STRNCPY
1465	sub	$64, %ebx
1466	jbe	L(StrncpyLeave9)
1467#endif
1468	palignr	$9, %xmm2, %xmm3
1469	lea	64(%ecx), %ecx
1470	palignr	$9, %xmm1, %xmm2
1471	movaps	%xmm7, %xmm1
1472	movaps	%xmm5, 48(%edx)
1473	movaps	%xmm4, 32(%edx)
1474	movaps	%xmm3, 16(%edx)
1475	movaps	%xmm2, (%edx)
1476	lea	64(%edx), %edx
1477	jmp	L(Shl9LoopStart)
1478
1479L(Shl9LoopExit):
1480	movlpd	-1(%ecx), %xmm0
1481	movlpd	%xmm0, -1(%edx)
1482	mov	$7, %esi
1483	jmp	L(CopyFrom1To16Bytes)
1484
1485	.p2align 4
1486L(Shl10):
1487	movaps	-10(%ecx), %xmm1
1488	movaps	6(%ecx), %xmm2
1489L(Shl10Start):
1490	pcmpeqb	%xmm2, %xmm0
1491	pmovmskb %xmm0, %eax
1492	movaps	%xmm2, %xmm3
1493#ifdef USE_AS_STRNCPY
1494	sub	$16, %ebx
1495	jbe	L(StrncpyExit10Case2OrCase3)
1496#endif
1497	test	%eax, %eax
1498	jnz	L(Shl10LoopExit)
1499
1500	palignr	$10, %xmm1, %xmm2
1501	movaps	%xmm3, %xmm1
1502	movaps	%xmm2, (%edx)
1503	movaps	22(%ecx), %xmm2
1504
1505	pcmpeqb	%xmm2, %xmm0
1506	lea	16(%edx), %edx
1507	pmovmskb %xmm0, %eax
1508	lea	16(%ecx), %ecx
1509	movaps	%xmm2, %xmm3
1510#ifdef USE_AS_STRNCPY
1511	sub	$16, %ebx
1512	jbe	L(StrncpyExit10Case2OrCase3)
1513#endif
1514	test	%eax, %eax
1515	jnz	L(Shl10LoopExit)
1516
1517	palignr	$10, %xmm1, %xmm2
1518	movaps	%xmm2, (%edx)
1519	movaps	22(%ecx), %xmm2
1520	movaps	%xmm3, %xmm1
1521
1522	pcmpeqb	%xmm2, %xmm0
1523	lea	16(%edx), %edx
1524	pmovmskb %xmm0, %eax
1525	lea	16(%ecx), %ecx
1526	movaps	%xmm2, %xmm3
1527#ifdef USE_AS_STRNCPY
1528	sub	$16, %ebx
1529	jbe	L(StrncpyExit10Case2OrCase3)
1530#endif
1531	test	%eax, %eax
1532	jnz	L(Shl10LoopExit)
1533
1534	palignr	$10, %xmm1, %xmm2
1535	movaps	%xmm2, (%edx)
1536	movaps	22(%ecx), %xmm2
1537
1538	pcmpeqb	%xmm2, %xmm0
1539	lea	16(%edx), %edx
1540	pmovmskb %xmm0, %eax
1541	lea	16(%ecx), %ecx
1542#ifdef USE_AS_STRNCPY
1543	sub	$16, %ebx
1544	jbe	L(StrncpyExit10Case2OrCase3)
1545#endif
1546	test	%eax, %eax
1547	jnz	L(Shl10LoopExit)
1548
1549	palignr	$10, %xmm3, %xmm2
1550	movaps	%xmm2, (%edx)
1551	lea	22(%ecx), %ecx
1552	lea	16(%edx), %edx
1553
1554	mov	%ecx, %eax
1555	and	$-0x40, %ecx
1556	sub	%ecx, %eax
1557	lea	-6(%ecx), %ecx
1558	sub	%eax, %edx
1559#ifdef USE_AS_STRNCPY
1560	add	%eax, %ebx
1561#endif
1562	movaps	-10(%ecx), %xmm1
1563
1564L(Shl10LoopStart):
1565	movaps	6(%ecx), %xmm2
1566	movaps	22(%ecx), %xmm3
1567	movaps	%xmm3, %xmm6
1568	movaps	38(%ecx), %xmm4
1569	movaps	%xmm4, %xmm7
1570	movaps	54(%ecx), %xmm5
1571	pminub	%xmm2, %xmm6
1572	pminub	%xmm5, %xmm7
1573	pminub	%xmm6, %xmm7
1574	pcmpeqb	%xmm0, %xmm7
1575	pmovmskb %xmm7, %eax
1576	movaps	%xmm5, %xmm7
1577	palignr	$10, %xmm4, %xmm5
1578	palignr	$10, %xmm3, %xmm4
1579	test	%eax, %eax
1580	jnz	L(Shl10Start)
1581#ifdef USE_AS_STRNCPY
1582	sub	$64, %ebx
1583	jbe	L(StrncpyLeave10)
1584#endif
1585	palignr	$10, %xmm2, %xmm3
1586	lea	64(%ecx), %ecx
1587	palignr	$10, %xmm1, %xmm2
1588	movaps	%xmm7, %xmm1
1589	movaps	%xmm5, 48(%edx)
1590	movaps	%xmm4, 32(%edx)
1591	movaps	%xmm3, 16(%edx)
1592	movaps	%xmm2, (%edx)
1593	lea	64(%edx), %edx
1594	jmp	L(Shl10LoopStart)
1595
1596L(Shl10LoopExit):
1597	movlpd	-2(%ecx), %xmm0
1598	movlpd	%xmm0, -2(%edx)
1599	mov	$6, %esi
1600	jmp	L(CopyFrom1To16Bytes)
1601
1602	.p2align 4
1603L(Shl11):
1604	movaps	-11(%ecx), %xmm1
1605	movaps	5(%ecx), %xmm2
1606L(Shl11Start):
1607	pcmpeqb	%xmm2, %xmm0
1608	pmovmskb %xmm0, %eax
1609	movaps	%xmm2, %xmm3
1610#ifdef USE_AS_STRNCPY
1611	sub	$16, %ebx
1612	jbe	L(StrncpyExit11Case2OrCase3)
1613#endif
1614	test	%eax, %eax
1615	jnz	L(Shl11LoopExit)
1616
1617	palignr	$11, %xmm1, %xmm2
1618	movaps	%xmm3, %xmm1
1619	movaps	%xmm2, (%edx)
1620	movaps	21(%ecx), %xmm2
1621
1622	pcmpeqb	%xmm2, %xmm0
1623	lea	16(%edx), %edx
1624	pmovmskb %xmm0, %eax
1625	lea	16(%ecx), %ecx
1626	movaps	%xmm2, %xmm3
1627#ifdef USE_AS_STRNCPY
1628	sub	$16, %ebx
1629	jbe	L(StrncpyExit11Case2OrCase3)
1630#endif
1631	test	%eax, %eax
1632	jnz	L(Shl11LoopExit)
1633
1634	palignr	$11, %xmm1, %xmm2
1635	movaps	%xmm2, (%edx)
1636	movaps	21(%ecx), %xmm2
1637	movaps	%xmm3, %xmm1
1638
1639	pcmpeqb	%xmm2, %xmm0
1640	lea	16(%edx), %edx
1641	pmovmskb %xmm0, %eax
1642	lea	16(%ecx), %ecx
1643	movaps	%xmm2, %xmm3
1644#ifdef USE_AS_STRNCPY
1645	sub	$16, %ebx
1646	jbe	L(StrncpyExit11Case2OrCase3)
1647#endif
1648	test	%eax, %eax
1649	jnz	L(Shl11LoopExit)
1650
1651	palignr	$11, %xmm1, %xmm2
1652	movaps	%xmm2, (%edx)
1653	movaps	21(%ecx), %xmm2
1654
1655	pcmpeqb	%xmm2, %xmm0
1656	lea	16(%edx), %edx
1657	pmovmskb %xmm0, %eax
1658	lea	16(%ecx), %ecx
1659#ifdef USE_AS_STRNCPY
1660	sub	$16, %ebx
1661	jbe	L(StrncpyExit11Case2OrCase3)
1662#endif
1663	test	%eax, %eax
1664	jnz	L(Shl11LoopExit)
1665
1666	palignr	$11, %xmm3, %xmm2
1667	movaps	%xmm2, (%edx)
1668	lea	21(%ecx), %ecx
1669	lea	16(%edx), %edx
1670
1671	mov	%ecx, %eax
1672	and	$-0x40, %ecx
1673	sub	%ecx, %eax
1674	lea	-5(%ecx), %ecx
1675	sub	%eax, %edx
1676#ifdef USE_AS_STRNCPY
1677	add	%eax, %ebx
1678#endif
1679	movaps	-11(%ecx), %xmm1
1680
1681L(Shl11LoopStart):
1682	movaps	5(%ecx), %xmm2
1683	movaps	21(%ecx), %xmm3
1684	movaps	%xmm3, %xmm6
1685	movaps	37(%ecx), %xmm4
1686	movaps	%xmm4, %xmm7
1687	movaps	53(%ecx), %xmm5
1688	pminub	%xmm2, %xmm6
1689	pminub	%xmm5, %xmm7
1690	pminub	%xmm6, %xmm7
1691	pcmpeqb	%xmm0, %xmm7
1692	pmovmskb %xmm7, %eax
1693	movaps	%xmm5, %xmm7
1694	palignr	$11, %xmm4, %xmm5
1695	palignr	$11, %xmm3, %xmm4
1696	test	%eax, %eax
1697	jnz	L(Shl11Start)
1698#ifdef USE_AS_STRNCPY
1699	sub	$64, %ebx
1700	jbe	L(StrncpyLeave11)
1701#endif
1702	palignr	$11, %xmm2, %xmm3
1703	lea	64(%ecx), %ecx
1704	palignr	$11, %xmm1, %xmm2
1705	movaps	%xmm7, %xmm1
1706	movaps	%xmm5, 48(%edx)
1707	movaps	%xmm4, 32(%edx)
1708	movaps	%xmm3, 16(%edx)
1709	movaps	%xmm2, (%edx)
1710	lea	64(%edx), %edx
1711	jmp	L(Shl11LoopStart)
1712
1713L(Shl11LoopExit):
1714	movlpd	-3(%ecx), %xmm0
1715	movlpd	%xmm0, -3(%edx)
1716	mov	$5, %esi
1717	jmp	L(CopyFrom1To16Bytes)
1718
1719	.p2align 4
1720L(Shl12):
1721	movaps	-12(%ecx), %xmm1
1722	movaps	4(%ecx), %xmm2
1723L(Shl12Start):
1724	pcmpeqb	%xmm2, %xmm0
1725	pmovmskb %xmm0, %eax
1726	movaps	%xmm2, %xmm3
1727#ifdef USE_AS_STRNCPY
1728	sub	$16, %ebx
1729	jbe	L(StrncpyExit12Case2OrCase3)
1730#endif
1731	test	%eax, %eax
1732	jnz	L(Shl12LoopExit)
1733
1734	palignr	$12, %xmm1, %xmm2
1735	movaps	%xmm3, %xmm1
1736	movaps	%xmm2, (%edx)
1737	movaps	20(%ecx), %xmm2
1738
1739	pcmpeqb	%xmm2, %xmm0
1740	lea	16(%edx), %edx
1741	pmovmskb %xmm0, %eax
1742	lea	16(%ecx), %ecx
1743	movaps	%xmm2, %xmm3
1744#ifdef USE_AS_STRNCPY
1745	sub	$16, %ebx
1746	jbe	L(StrncpyExit12Case2OrCase3)
1747#endif
1748	test	%eax, %eax
1749	jnz	L(Shl12LoopExit)
1750
1751	palignr	$12, %xmm1, %xmm2
1752	movaps	%xmm2, (%edx)
1753	movaps	20(%ecx), %xmm2
1754	movaps	%xmm3, %xmm1
1755
1756	pcmpeqb	%xmm2, %xmm0
1757	lea	16(%edx), %edx
1758	pmovmskb %xmm0, %eax
1759	lea	16(%ecx), %ecx
1760	movaps	%xmm2, %xmm3
1761#ifdef USE_AS_STRNCPY
1762	sub	$16, %ebx
1763	jbe	L(StrncpyExit12Case2OrCase3)
1764#endif
1765	test	%eax, %eax
1766	jnz	L(Shl12LoopExit)
1767
1768	palignr	$12, %xmm1, %xmm2
1769	movaps	%xmm2, (%edx)
1770	movaps	20(%ecx), %xmm2
1771
1772	pcmpeqb	%xmm2, %xmm0
1773	lea	16(%edx), %edx
1774	pmovmskb %xmm0, %eax
1775	lea	16(%ecx), %ecx
1776#ifdef USE_AS_STRNCPY
1777	sub	$16, %ebx
1778	jbe	L(StrncpyExit12Case2OrCase3)
1779#endif
1780	test	%eax, %eax
1781	jnz	L(Shl12LoopExit)
1782
1783	palignr	$12, %xmm3, %xmm2
1784	movaps	%xmm2, (%edx)
1785	lea	20(%ecx), %ecx
1786	lea	16(%edx), %edx
1787
1788	mov	%ecx, %eax
1789	and	$-0x40, %ecx
1790	sub	%ecx, %eax
1791	lea	-4(%ecx), %ecx
1792	sub	%eax, %edx
1793#ifdef USE_AS_STRNCPY
1794	add	%eax, %ebx
1795#endif
1796	movaps	-12(%ecx), %xmm1
1797
1798L(Shl12LoopStart):
1799	movaps	4(%ecx), %xmm2
1800	movaps	20(%ecx), %xmm3
1801	movaps	%xmm3, %xmm6
1802	movaps	36(%ecx), %xmm4
1803	movaps	%xmm4, %xmm7
1804	movaps	52(%ecx), %xmm5
1805	pminub	%xmm2, %xmm6
1806	pminub	%xmm5, %xmm7
1807	pminub	%xmm6, %xmm7
1808	pcmpeqb	%xmm0, %xmm7
1809	pmovmskb %xmm7, %eax
1810	movaps	%xmm5, %xmm7
1811	palignr	$12, %xmm4, %xmm5
1812	palignr	$12, %xmm3, %xmm4
1813	test	%eax, %eax
1814	jnz	L(Shl12Start)
1815#ifdef USE_AS_STRNCPY
1816	sub	$64, %ebx
1817	jbe	L(StrncpyLeave12)
1818#endif
1819	palignr	$12, %xmm2, %xmm3
1820	lea	64(%ecx), %ecx
1821	palignr	$12, %xmm1, %xmm2
1822	movaps	%xmm7, %xmm1
1823	movaps	%xmm5, 48(%edx)
1824	movaps	%xmm4, 32(%edx)
1825	movaps	%xmm3, 16(%edx)
1826	movaps	%xmm2, (%edx)
1827	lea	64(%edx), %edx
1828	jmp	L(Shl12LoopStart)
1829
1830L(Shl12LoopExit):
1831	movl	(%ecx), %esi
1832	movl	%esi, (%edx)
1833	mov	$4, %esi
1834	jmp	L(CopyFrom1To16Bytes)
1835
1836	.p2align 4
1837L(Shl13):
1838	movaps	-13(%ecx), %xmm1
1839	movaps	3(%ecx), %xmm2
1840L(Shl13Start):
1841	pcmpeqb	%xmm2, %xmm0
1842	pmovmskb %xmm0, %eax
1843	movaps	%xmm2, %xmm3
1844#ifdef USE_AS_STRNCPY
1845	sub	$16, %ebx
1846	jbe	L(StrncpyExit13Case2OrCase3)
1847#endif
1848	test	%eax, %eax
1849	jnz	L(Shl13LoopExit)
1850
1851	palignr	$13, %xmm1, %xmm2
1852	movaps	%xmm3, %xmm1
1853	movaps	%xmm2, (%edx)
1854	movaps	19(%ecx), %xmm2
1855
1856	pcmpeqb	%xmm2, %xmm0
1857	lea	16(%edx), %edx
1858	pmovmskb %xmm0, %eax
1859	lea	16(%ecx), %ecx
1860	movaps	%xmm2, %xmm3
1861#ifdef USE_AS_STRNCPY
1862	sub	$16, %ebx
1863	jbe	L(StrncpyExit13Case2OrCase3)
1864#endif
1865	test	%eax, %eax
1866	jnz	L(Shl13LoopExit)
1867
1868	palignr	$13, %xmm1, %xmm2
1869	movaps	%xmm2, (%edx)
1870	movaps	19(%ecx), %xmm2
1871	movaps	%xmm3, %xmm1
1872
1873	pcmpeqb	%xmm2, %xmm0
1874	lea	16(%edx), %edx
1875	pmovmskb %xmm0, %eax
1876	lea	16(%ecx), %ecx
1877	movaps	%xmm2, %xmm3
1878#ifdef USE_AS_STRNCPY
1879	sub	$16, %ebx
1880	jbe	L(StrncpyExit13Case2OrCase3)
1881#endif
1882	test	%eax, %eax
1883	jnz	L(Shl13LoopExit)
1884
1885	palignr	$13, %xmm1, %xmm2
1886	movaps	%xmm2, (%edx)
1887	movaps	19(%ecx), %xmm2
1888
1889	pcmpeqb	%xmm2, %xmm0
1890	lea	16(%edx), %edx
1891	pmovmskb %xmm0, %eax
1892	lea	16(%ecx), %ecx
1893#ifdef USE_AS_STRNCPY
1894	sub	$16, %ebx
1895	jbe	L(StrncpyExit13Case2OrCase3)
1896#endif
1897	test	%eax, %eax
1898	jnz	L(Shl13LoopExit)
1899
1900	palignr	$13, %xmm3, %xmm2
1901	movaps	%xmm2, (%edx)
1902	lea	19(%ecx), %ecx
1903	lea	16(%edx), %edx
1904
1905	mov	%ecx, %eax
1906	and	$-0x40, %ecx
1907	sub	%ecx, %eax
1908	lea	-3(%ecx), %ecx
1909	sub	%eax, %edx
1910#ifdef USE_AS_STRNCPY
1911	add	%eax, %ebx
1912#endif
1913	movaps	-13(%ecx), %xmm1
1914
1915L(Shl13LoopStart):
1916	movaps	3(%ecx), %xmm2
1917	movaps	19(%ecx), %xmm3
1918	movaps	%xmm3, %xmm6
1919	movaps	35(%ecx), %xmm4
1920	movaps	%xmm4, %xmm7
1921	movaps	51(%ecx), %xmm5
1922	pminub	%xmm2, %xmm6
1923	pminub	%xmm5, %xmm7
1924	pminub	%xmm6, %xmm7
1925	pcmpeqb	%xmm0, %xmm7
1926	pmovmskb %xmm7, %eax
1927	movaps	%xmm5, %xmm7
1928	palignr	$13, %xmm4, %xmm5
1929	palignr	$13, %xmm3, %xmm4
1930	test	%eax, %eax
1931	jnz	L(Shl13Start)
1932#ifdef USE_AS_STRNCPY
1933	sub	$64, %ebx
1934	jbe	L(StrncpyLeave13)
1935#endif
1936	palignr	$13, %xmm2, %xmm3
1937	lea	64(%ecx), %ecx
1938	palignr	$13, %xmm1, %xmm2
1939	movaps	%xmm7, %xmm1
1940	movaps	%xmm5, 48(%edx)
1941	movaps	%xmm4, 32(%edx)
1942	movaps	%xmm3, 16(%edx)
1943	movaps	%xmm2, (%edx)
1944	lea	64(%edx), %edx
1945	jmp	L(Shl13LoopStart)
1946
1947L(Shl13LoopExit):
1948	movl	-1(%ecx), %esi
1949	movl	%esi, -1(%edx)
1950	mov	$3, %esi
1951	jmp	L(CopyFrom1To16Bytes)
1952
1953	.p2align 4
1954L(Shl14):
1955	movaps	-14(%ecx), %xmm1
1956	movaps	2(%ecx), %xmm2
1957L(Shl14Start):
1958	pcmpeqb	%xmm2, %xmm0
1959	pmovmskb %xmm0, %eax
1960	movaps	%xmm2, %xmm3
1961#ifdef USE_AS_STRNCPY
1962	sub	$16, %ebx
1963	jbe	L(StrncpyExit14Case2OrCase3)
1964#endif
1965	test	%eax, %eax
1966	jnz	L(Shl14LoopExit)
1967
1968	palignr	$14, %xmm1, %xmm2
1969	movaps	%xmm3, %xmm1
1970	movaps	%xmm2, (%edx)
1971	movaps	18(%ecx), %xmm2
1972
1973	pcmpeqb	%xmm2, %xmm0
1974	lea	16(%edx), %edx
1975	pmovmskb %xmm0, %eax
1976	lea	16(%ecx), %ecx
1977	movaps	%xmm2, %xmm3
1978#ifdef USE_AS_STRNCPY
1979	sub	$16, %ebx
1980	jbe	L(StrncpyExit14Case2OrCase3)
1981#endif
1982	test	%eax, %eax
1983	jnz	L(Shl14LoopExit)
1984
1985	palignr	$14, %xmm1, %xmm2
1986	movaps	%xmm2, (%edx)
1987	movaps	18(%ecx), %xmm2
1988	movaps	%xmm3, %xmm1
1989
1990	pcmpeqb	%xmm2, %xmm0
1991	lea	16(%edx), %edx
1992	pmovmskb %xmm0, %eax
1993	lea	16(%ecx), %ecx
1994	movaps	%xmm2, %xmm3
1995#ifdef USE_AS_STRNCPY
1996	sub	$16, %ebx
1997	jbe	L(StrncpyExit14Case2OrCase3)
1998#endif
1999	test	%eax, %eax
2000	jnz	L(Shl14LoopExit)
2001
2002	palignr	$14, %xmm1, %xmm2
2003	movaps	%xmm2, (%edx)
2004	movaps	18(%ecx), %xmm2
2005
2006	pcmpeqb	%xmm2, %xmm0
2007	lea	16(%edx), %edx
2008	pmovmskb %xmm0, %eax
2009	lea	16(%ecx), %ecx
2010#ifdef USE_AS_STRNCPY
2011	sub	$16, %ebx
2012	jbe	L(StrncpyExit14Case2OrCase3)
2013#endif
2014	test	%eax, %eax
2015	jnz	L(Shl14LoopExit)
2016
2017	palignr	$14, %xmm3, %xmm2
2018	movaps	%xmm2, (%edx)
2019	lea	18(%ecx), %ecx
2020	lea	16(%edx), %edx
2021
2022	mov	%ecx, %eax
2023	and	$-0x40, %ecx
2024	sub	%ecx, %eax
2025	lea	-2(%ecx), %ecx
2026	sub	%eax, %edx
2027#ifdef USE_AS_STRNCPY
2028	add	%eax, %ebx
2029#endif
2030	movaps	-14(%ecx), %xmm1
2031
2032L(Shl14LoopStart):
2033	movaps	2(%ecx), %xmm2
2034	movaps	18(%ecx), %xmm3
2035	movaps	%xmm3, %xmm6
2036	movaps	34(%ecx), %xmm4
2037	movaps	%xmm4, %xmm7
2038	movaps	50(%ecx), %xmm5
2039	pminub	%xmm2, %xmm6
2040	pminub	%xmm5, %xmm7
2041	pminub	%xmm6, %xmm7
2042	pcmpeqb	%xmm0, %xmm7
2043	pmovmskb %xmm7, %eax
2044	movaps	%xmm5, %xmm7
2045	palignr	$14, %xmm4, %xmm5
2046	palignr	$14, %xmm3, %xmm4
2047	test	%eax, %eax
2048	jnz	L(Shl14Start)
2049#ifdef USE_AS_STRNCPY
2050	sub	$64, %ebx
2051	jbe	L(StrncpyLeave14)
2052#endif
2053	palignr	$14, %xmm2, %xmm3
2054	lea	64(%ecx), %ecx
2055	palignr	$14, %xmm1, %xmm2
2056	movaps	%xmm7, %xmm1
2057	movaps	%xmm5, 48(%edx)
2058	movaps	%xmm4, 32(%edx)
2059	movaps	%xmm3, 16(%edx)
2060	movaps	%xmm2, (%edx)
2061	lea	64(%edx), %edx
2062	jmp	L(Shl14LoopStart)
2063
2064L(Shl14LoopExit):
2065	movl	-2(%ecx), %esi
2066	movl	%esi, -2(%edx)
2067	mov	$2, %esi
2068	jmp	L(CopyFrom1To16Bytes)
2069
2070	.p2align 4
2071L(Shl15):
2072	movaps	-15(%ecx), %xmm1
2073	movaps	1(%ecx), %xmm2
2074L(Shl15Start):
2075	pcmpeqb	%xmm2, %xmm0
2076	pmovmskb %xmm0, %eax
2077	movaps	%xmm2, %xmm3
2078#ifdef USE_AS_STRNCPY
2079	sub	$16, %ebx
2080	jbe	L(StrncpyExit15Case2OrCase3)
2081#endif
2082	test	%eax, %eax
2083	jnz	L(Shl15LoopExit)
2084
2085	palignr	$15, %xmm1, %xmm2
2086	movaps	%xmm3, %xmm1
2087	movaps	%xmm2, (%edx)
2088	movaps	17(%ecx), %xmm2
2089
2090	pcmpeqb	%xmm2, %xmm0
2091	lea	16(%edx), %edx
2092	pmovmskb %xmm0, %eax
2093	lea	16(%ecx), %ecx
2094	movaps	%xmm2, %xmm3
2095#ifdef USE_AS_STRNCPY
2096	sub	$16, %ebx
2097	jbe	L(StrncpyExit15Case2OrCase3)
2098#endif
2099	test	%eax, %eax
2100	jnz	L(Shl15LoopExit)
2101
2102	palignr	$15, %xmm1, %xmm2
2103	movaps	%xmm2, (%edx)
2104	movaps	17(%ecx), %xmm2
2105	movaps	%xmm3, %xmm1
2106
2107	pcmpeqb	%xmm2, %xmm0
2108	lea	16(%edx), %edx
2109	pmovmskb %xmm0, %eax
2110	lea	16(%ecx), %ecx
2111	movaps	%xmm2, %xmm3
2112#ifdef USE_AS_STRNCPY
2113	sub	$16, %ebx
2114	jbe	L(StrncpyExit15Case2OrCase3)
2115#endif
2116	test	%eax, %eax
2117	jnz	L(Shl15LoopExit)
2118
2119	palignr	$15, %xmm1, %xmm2
2120	movaps	%xmm2, (%edx)
2121	movaps	17(%ecx), %xmm2
2122
2123	pcmpeqb	%xmm2, %xmm0
2124	lea	16(%edx), %edx
2125	pmovmskb %xmm0, %eax
2126	lea	16(%ecx), %ecx
2127#ifdef USE_AS_STRNCPY
2128	sub	$16, %ebx
2129	jbe	L(StrncpyExit15Case2OrCase3)
2130#endif
2131	test	%eax, %eax
2132	jnz	L(Shl15LoopExit)
2133
2134	palignr	$15, %xmm3, %xmm2
2135	movaps	%xmm2, (%edx)
2136	lea	17(%ecx), %ecx
2137	lea	16(%edx), %edx
2138
2139	mov	%ecx, %eax
2140	and	$-0x40, %ecx
2141	sub	%ecx, %eax
2142	lea	-1(%ecx), %ecx
2143	sub	%eax, %edx
2144#ifdef USE_AS_STRNCPY
2145	add	%eax, %ebx
2146#endif
2147	movaps	-15(%ecx), %xmm1
2148
2149L(Shl15LoopStart):
2150	movaps	1(%ecx), %xmm2
2151	movaps	17(%ecx), %xmm3
2152	movaps	%xmm3, %xmm6
2153	movaps	33(%ecx), %xmm4
2154	movaps	%xmm4, %xmm7
2155	movaps	49(%ecx), %xmm5
2156	pminub	%xmm2, %xmm6
2157	pminub	%xmm5, %xmm7
2158	pminub	%xmm6, %xmm7
2159	pcmpeqb	%xmm0, %xmm7
2160	pmovmskb %xmm7, %eax
2161	movaps	%xmm5, %xmm7
2162	palignr	$15, %xmm4, %xmm5
2163	palignr	$15, %xmm3, %xmm4
2164	test	%eax, %eax
2165	jnz	L(Shl15Start)
2166#ifdef USE_AS_STRNCPY
2167	sub	$64, %ebx
2168	jbe	L(StrncpyLeave15)
2169#endif
2170	palignr	$15, %xmm2, %xmm3
2171	lea	64(%ecx), %ecx
2172	palignr	$15, %xmm1, %xmm2
2173	movaps	%xmm7, %xmm1
2174	movaps	%xmm5, 48(%edx)
2175	movaps	%xmm4, 32(%edx)
2176	movaps	%xmm3, 16(%edx)
2177	movaps	%xmm2, (%edx)
2178	lea	64(%edx), %edx
2179	jmp	L(Shl15LoopStart)
2180
2181L(Shl15LoopExit):
2182	movl	-3(%ecx), %esi
2183	movl	%esi, -3(%edx)
2184	mov	$1, %esi
2185#if defined USE_AS_STRCAT || defined USE_AS_STRLCPY
2186	jmp	L(CopyFrom1To16Bytes)
2187#endif
2188
2189
2190#if !defined USE_AS_STRCAT && !defined USE_AS_STRLCPY
2191
2192	.p2align 4
2193L(CopyFrom1To16Bytes):
2194# ifdef USE_AS_STRNCPY
2195	add	$16, %ebx
2196# endif
2197	add	%esi, %edx
2198	add	%esi, %ecx
2199
2200	POP	(%esi)
2201	test	%al, %al
2202	jz	L(ExitHigh8)
2203
2204L(CopyFrom1To16BytesLess8):
2205	mov	%al, %ah
2206	and	$15, %ah
2207	jz	L(ExitHigh4)
2208
2209	test	$0x01, %al
2210	jnz	L(Exit1)
2211	test	$0x02, %al
2212	jnz	L(Exit2)
2213	test	$0x04, %al
2214	jnz	L(Exit3)
2215
2216	.p2align 4
2217L(Exit4):
2218	movl	(%ecx), %eax
2219	movl	%eax, (%edx)
2220	SAVE_RESULT	(3)
2221# ifdef USE_AS_STRNCPY
2222	sub	$4, %ebx
2223	lea	4(%edx), %ecx
2224	jnz	L(StrncpyFillTailWithZero1)
2225#  ifdef USE_AS_STPCPY
2226	cmpb	$1, (%eax)
2227	sbb	$-1, %eax
2228#  endif
2229# endif
2230	RETURN1
2231
2232	.p2align 4
2233L(ExitHigh4):
2234	test	$0x10, %al
2235	jnz	L(Exit5)
2236	test	$0x20, %al
2237	jnz	L(Exit6)
2238	test	$0x40, %al
2239	jnz	L(Exit7)
2240
2241	.p2align 4
2242L(Exit8):
2243	movlpd	(%ecx), %xmm0
2244	movlpd	%xmm0, (%edx)
2245	SAVE_RESULT	(7)
2246# ifdef USE_AS_STRNCPY
2247	sub	$8, %ebx
2248	lea	8(%edx), %ecx
2249	jnz	L(StrncpyFillTailWithZero1)
2250#  ifdef USE_AS_STPCPY
2251	cmpb	$1, (%eax)
2252	sbb	$-1, %eax
2253#  endif
2254# endif
2255	RETURN1
2256
2257	.p2align 4
2258L(ExitHigh8):
2259	mov	%ah, %al
2260	and	$15, %al
2261	jz	L(ExitHigh12)
2262
2263	test	$0x01, %ah
2264	jnz	L(Exit9)
2265	test	$0x02, %ah
2266	jnz	L(Exit10)
2267	test	$0x04, %ah
2268	jnz	L(Exit11)
2269
2270	.p2align 4
2271L(Exit12):
2272	movlpd	(%ecx), %xmm0
2273	movl	8(%ecx), %eax
2274	movlpd	%xmm0, (%edx)
2275	movl	%eax, 8(%edx)
2276	SAVE_RESULT	(11)
2277# ifdef USE_AS_STRNCPY
2278	sub	$12, %ebx
2279	lea	12(%edx), %ecx
2280	jnz	L(StrncpyFillTailWithZero1)
2281#  ifdef USE_AS_STPCPY
2282	cmpb	$1, (%eax)
2283	sbb	$-1, %eax
2284#  endif
2285# endif
2286	RETURN1
2287
2288	.p2align 4
2289L(ExitHigh12):
2290	test	$0x10, %ah
2291	jnz	L(Exit13)
2292	test	$0x20, %ah
2293	jnz	L(Exit14)
2294	test	$0x40, %ah
2295	jnz	L(Exit15)
2296
2297	.p2align 4
2298L(Exit16):
2299	movdqu	(%ecx), %xmm0
2300	movdqu	%xmm0, (%edx)
2301	SAVE_RESULT	(15)
2302# ifdef USE_AS_STRNCPY
2303	sub	$16, %ebx
2304	lea	16(%edx), %ecx
2305	jnz	L(StrncpyFillTailWithZero1)
2306#  ifdef USE_AS_STPCPY
2307	cmpb	$1, (%eax)
2308	sbb	$-1, %eax
2309#  endif
2310# endif
2311	RETURN1
2312
2313#  ifdef USE_AS_STRNCPY
2314
2315	CFI_PUSH(%esi)
2316
2317	.p2align 4
2318L(CopyFrom1To16BytesCase2):
2319	add	$16, %ebx
2320	add	%esi, %ecx
2321	add	%esi, %edx
2322
2323	POP	(%esi)
2324
2325	test	%al, %al
2326	jz	L(ExitHighCase2)
2327
2328	cmp	$8, %ebx
2329	ja	L(CopyFrom1To16BytesLess8)
2330
2331	test	$0x01, %al
2332	jnz	L(Exit1)
2333	cmp	$1, %ebx
2334	je	L(Exit1)
2335	test	$0x02, %al
2336	jnz	L(Exit2)
2337	cmp	$2, %ebx
2338	je	L(Exit2)
2339	test	$0x04, %al
2340	jnz	L(Exit3)
2341	cmp	$3, %ebx
2342	je	L(Exit3)
2343	test	$0x08, %al
2344	jnz	L(Exit4)
2345	cmp	$4, %ebx
2346	je	L(Exit4)
2347	test	$0x10, %al
2348	jnz	L(Exit5)
2349	cmp	$5, %ebx
2350	je	L(Exit5)
2351	test	$0x20, %al
2352	jnz	L(Exit6)
2353	cmp	$6, %ebx
2354	je	L(Exit6)
2355	test	$0x40, %al
2356	jnz	L(Exit7)
2357	cmp	$7, %ebx
2358	je	L(Exit7)
2359	jmp	L(Exit8)
2360
2361	.p2align 4
2362L(ExitHighCase2):
2363	cmp	$8, %ebx
2364	jbe	L(CopyFrom1To16BytesLess8Case3)
2365
2366	test	$0x01, %ah
2367	jnz	L(Exit9)
2368	cmp	$9, %ebx
2369	je	L(Exit9)
2370	test	$0x02, %ah
2371	jnz	L(Exit10)
2372	cmp	$10, %ebx
2373	je	L(Exit10)
2374	test	$0x04, %ah
2375	jnz	L(Exit11)
2376	cmp	$11, %ebx
2377	je	L(Exit11)
2378	test	$0x8, %ah
2379	jnz	L(Exit12)
2380	cmp	$12, %ebx
2381	je	L(Exit12)
2382	test	$0x10, %ah
2383	jnz	L(Exit13)
2384	cmp	$13, %ebx
2385	je	L(Exit13)
2386	test	$0x20, %ah
2387	jnz	L(Exit14)
2388	cmp	$14, %ebx
2389	je	L(Exit14)
2390	test	$0x40, %ah
2391	jnz	L(Exit15)
2392	cmp	$15, %ebx
2393	je	L(Exit15)
2394	jmp	L(Exit16)
2395
2396	CFI_PUSH(%esi)
2397
2398	.p2align 4
2399L(CopyFrom1To16BytesCase2OrCase3):
2400	test	%eax, %eax
2401	jnz	L(CopyFrom1To16BytesCase2)
2402
2403	.p2align 4
2404L(CopyFrom1To16BytesCase3):
2405	add	$16, %ebx
2406	add	%esi, %edx
2407	add	%esi, %ecx
2408
2409	POP	(%esi)
2410
2411	cmp	$8, %ebx
2412	ja	L(ExitHigh8Case3)
2413
2414L(CopyFrom1To16BytesLess8Case3):
2415	cmp	$4, %ebx
2416	ja	L(ExitHigh4Case3)
2417
2418	cmp	$1, %ebx
2419	je	L(Exit1)
2420	cmp	$2, %ebx
2421	je	L(Exit2)
2422	cmp	$3, %ebx
2423	je	L(Exit3)
2424	movl	(%ecx), %eax
2425	movl	%eax, (%edx)
2426	SAVE_RESULT	(4)
2427	RETURN1
2428
2429	.p2align 4
2430L(ExitHigh4Case3):
2431	cmp	$5, %ebx
2432	je	L(Exit5)
2433	cmp	$6, %ebx
2434	je	L(Exit6)
2435	cmp	$7, %ebx
2436	je	L(Exit7)
2437	movlpd	(%ecx), %xmm0
2438	movlpd	%xmm0, (%edx)
2439	SAVE_RESULT	(8)
2440	RETURN1
2441
2442	.p2align 4
2443L(ExitHigh8Case3):
2444	cmp	$12, %ebx
2445	ja	L(ExitHigh12Case3)
2446
2447	cmp	$9, %ebx
2448	je	L(Exit9)
2449	cmp	$10, %ebx
2450	je	L(Exit10)
2451	cmp	$11, %ebx
2452	je	L(Exit11)
2453	movlpd	(%ecx), %xmm0
2454	movl	8(%ecx), %eax
2455	movlpd	%xmm0, (%edx)
2456	movl	%eax, 8(%edx)
2457	SAVE_RESULT	(12)
2458	RETURN1
2459
2460	.p2align 4
2461L(ExitHigh12Case3):
2462	cmp	$13, %ebx
2463	je	L(Exit13)
2464	cmp	$14, %ebx
2465	je	L(Exit14)
2466	cmp	$15, %ebx
2467	je	L(Exit15)
2468	movlpd	(%ecx), %xmm0
2469	movlpd	8(%ecx), %xmm1
2470	movlpd	%xmm0, (%edx)
2471	movlpd	%xmm1, 8(%edx)
2472	SAVE_RESULT	(16)
2473	RETURN1
2474
2475# endif
2476
2477	.p2align 4
2478L(Exit1):
2479	movb	(%ecx), %al
2480	movb	%al, (%edx)
2481	SAVE_RESULT	(0)
2482# ifdef USE_AS_STRNCPY
2483	sub	$1, %ebx
2484	lea	1(%edx), %ecx
2485	jnz	L(StrncpyFillTailWithZero1)
2486#  ifdef USE_AS_STPCPY
2487	cmpb	$1, (%eax)
2488	sbb	$-1, %eax
2489#  endif
2490# endif
2491	RETURN1
2492
2493	.p2align 4
2494L(Exit2):
2495	movw	(%ecx), %ax
2496	movw	%ax, (%edx)
2497	SAVE_RESULT	(1)
2498# ifdef USE_AS_STRNCPY
2499	sub	$2, %ebx
2500	lea	2(%edx), %ecx
2501	jnz	L(StrncpyFillTailWithZero1)
2502#  ifdef USE_AS_STPCPY
2503	cmpb	$1, (%eax)
2504	sbb	$-1, %eax
2505#  endif
2506# endif
2507	RETURN1
2508
2509	.p2align 4
2510L(Exit3):
2511	movw	(%ecx), %ax
2512	movw	%ax, (%edx)
2513	movb	2(%ecx), %al
2514	movb	%al, 2(%edx)
2515	SAVE_RESULT	(2)
2516# ifdef USE_AS_STRNCPY
2517	sub	$3, %ebx
2518	lea	3(%edx), %ecx
2519	jnz	L(StrncpyFillTailWithZero1)
2520#  ifdef USE_AS_STPCPY
2521	cmpb	$1, (%eax)
2522	sbb	$-1, %eax
2523#  endif
2524# endif
2525	RETURN1
2526
2527	.p2align 4
2528L(Exit5):
2529	movl	(%ecx), %eax
2530	movl	%eax, (%edx)
2531	movb	4(%ecx), %al
2532	movb	%al, 4(%edx)
2533	SAVE_RESULT	(4)
2534# ifdef USE_AS_STRNCPY
2535	sub	$5, %ebx
2536	lea	5(%edx), %ecx
2537	jnz	L(StrncpyFillTailWithZero1)
2538#  ifdef USE_AS_STPCPY
2539	cmpb	$1, (%eax)
2540	sbb	$-1, %eax
2541#  endif
2542# endif
2543	RETURN1
2544
2545	.p2align 4
2546L(Exit6):
2547	movl	(%ecx), %eax
2548	movl	%eax, (%edx)
2549	movw	4(%ecx), %ax
2550	movw	%ax, 4(%edx)
2551	SAVE_RESULT	(5)
2552# ifdef USE_AS_STRNCPY
2553	sub	$6, %ebx
2554	lea	6(%edx), %ecx
2555	jnz	L(StrncpyFillTailWithZero1)
2556#  ifdef USE_AS_STPCPY
2557	cmpb	$1, (%eax)
2558	sbb	$-1, %eax
2559#  endif
2560# endif
2561	RETURN1
2562
2563	.p2align 4
2564L(Exit7):
2565	movl	(%ecx), %eax
2566	movl	%eax, (%edx)
2567	movl	3(%ecx), %eax
2568	movl	%eax, 3(%edx)
2569	SAVE_RESULT	(6)
2570# ifdef USE_AS_STRNCPY
2571	sub	$7, %ebx
2572	lea	7(%edx), %ecx
2573	jnz	L(StrncpyFillTailWithZero1)
2574#  ifdef USE_AS_STPCPY
2575	cmpb	$1, (%eax)
2576	sbb	$-1, %eax
2577#  endif
2578# endif
2579	RETURN1
2580
2581	.p2align 4
2582L(Exit9):
2583	movlpd	(%ecx), %xmm0
2584	movb	8(%ecx), %al
2585	movlpd	%xmm0, (%edx)
2586	movb	%al, 8(%edx)
2587	SAVE_RESULT	(8)
2588# ifdef USE_AS_STRNCPY
2589	sub	$9, %ebx
2590	lea	9(%edx), %ecx
2591	jnz	L(StrncpyFillTailWithZero1)
2592#  ifdef USE_AS_STPCPY
2593	cmpb	$1, (%eax)
2594	sbb	$-1, %eax
2595#  endif
2596# endif
2597	RETURN1
2598
2599	.p2align 4
2600L(Exit10):
2601	movlpd	(%ecx), %xmm0
2602	movw	8(%ecx), %ax
2603	movlpd	%xmm0, (%edx)
2604	movw	%ax, 8(%edx)
2605	SAVE_RESULT	(9)
2606# ifdef USE_AS_STRNCPY
2607	sub	$10, %ebx
2608	lea	10(%edx), %ecx
2609	jnz	L(StrncpyFillTailWithZero1)
2610#  ifdef USE_AS_STPCPY
2611	cmpb	$1, (%eax)
2612	sbb	$-1, %eax
2613#  endif
2614# endif
2615	RETURN1
2616
2617	.p2align 4
2618L(Exit11):
2619	movlpd	(%ecx), %xmm0
2620	movl	7(%ecx), %eax
2621	movlpd	%xmm0, (%edx)
2622	movl	%eax, 7(%edx)
2623	SAVE_RESULT	(10)
2624# ifdef USE_AS_STRNCPY
2625	sub	$11, %ebx
2626	lea	11(%edx), %ecx
2627	jnz	L(StrncpyFillTailWithZero1)
2628#  ifdef USE_AS_STPCPY
2629	cmpb	$1, (%eax)
2630	sbb	$-1, %eax
2631#  endif
2632# endif
2633	RETURN1
2634
2635	.p2align 4
2636L(Exit13):
2637	movlpd	(%ecx), %xmm0
2638	movlpd	5(%ecx), %xmm1
2639	movlpd	%xmm0, (%edx)
2640	movlpd	%xmm1, 5(%edx)
2641	SAVE_RESULT	(12)
2642# ifdef USE_AS_STRNCPY
2643	sub	$13, %ebx
2644	lea	13(%edx), %ecx
2645	jnz	L(StrncpyFillTailWithZero1)
2646#  ifdef USE_AS_STPCPY
2647	cmpb	$1, (%eax)
2648	sbb	$-1, %eax
2649#  endif
2650# endif
2651	RETURN1
2652
2653	.p2align 4
2654L(Exit14):
2655	movlpd	(%ecx), %xmm0
2656	movlpd	6(%ecx), %xmm1
2657	movlpd	%xmm0, (%edx)
2658	movlpd	%xmm1, 6(%edx)
2659	SAVE_RESULT	(13)
2660# ifdef USE_AS_STRNCPY
2661	sub	$14, %ebx
2662	lea	14(%edx), %ecx
2663	jnz	L(StrncpyFillTailWithZero1)
2664#  ifdef USE_AS_STPCPY
2665	cmpb	$1, (%eax)
2666	sbb	$-1, %eax
2667#  endif
2668# endif
2669	RETURN1
2670
2671	.p2align 4
2672L(Exit15):
2673	movlpd	(%ecx), %xmm0
2674	movlpd	7(%ecx), %xmm1
2675	movlpd	%xmm0, (%edx)
2676	movlpd	%xmm1, 7(%edx)
2677	SAVE_RESULT	(14)
2678# ifdef USE_AS_STRNCPY
2679	sub	$15, %ebx
2680	lea	15(%edx), %ecx
2681	jnz	L(StrncpyFillTailWithZero1)
2682#  ifdef USE_AS_STPCPY
2683	cmpb	$1, (%eax)
2684	sbb	$-1, %eax
2685#  endif
2686# endif
2687	RETURN1
2688
2689CFI_POP	(%edi)
2690
2691# ifdef USE_AS_STRNCPY
2692	.p2align 4
2693L(Fill0):
2694	RETURN
2695
2696	.p2align 4
2697L(Fill1):
2698	movb	%dl, (%ecx)
2699	RETURN
2700
2701	.p2align 4
2702L(Fill2):
2703	movw	%dx, (%ecx)
2704	RETURN
2705
2706	.p2align 4
2707L(Fill3):
2708	movw	%dx, (%ecx)
2709	movb	%dl, 2(%ecx)
2710	RETURN
2711
2712	.p2align 4
2713L(Fill4):
2714	movl	%edx, (%ecx)
2715	RETURN
2716
2717	.p2align 4
2718L(Fill5):
2719	movl	%edx, (%ecx)
2720	movb	%dl, 4(%ecx)
2721	RETURN
2722
2723	.p2align 4
2724L(Fill6):
2725	movl	%edx, (%ecx)
2726	movw	%dx, 4(%ecx)
2727	RETURN
2728
2729	.p2align 4
2730L(Fill7):
2731	movl	%edx, (%ecx)
2732	movl	%edx, 3(%ecx)
2733	RETURN
2734
2735	.p2align 4
2736L(Fill8):
2737	movlpd	%xmm0, (%ecx)
2738	RETURN
2739
2740	.p2align 4
2741L(Fill9):
2742	movlpd	%xmm0, (%ecx)
2743	movb	%dl, 8(%ecx)
2744	RETURN
2745
2746	.p2align 4
2747L(Fill10):
2748	movlpd	%xmm0, (%ecx)
2749	movw	%dx, 8(%ecx)
2750	RETURN
2751
2752	.p2align 4
2753L(Fill11):
2754	movlpd	%xmm0, (%ecx)
2755	movl	%edx, 7(%ecx)
2756	RETURN
2757
2758	.p2align 4
2759L(Fill12):
2760	movlpd	%xmm0, (%ecx)
2761	movl	%edx, 8(%ecx)
2762	RETURN
2763
2764	.p2align 4
2765L(Fill13):
2766	movlpd	%xmm0, (%ecx)
2767	movlpd	%xmm0, 5(%ecx)
2768	RETURN
2769
2770	.p2align 4
2771L(Fill14):
2772	movlpd	%xmm0, (%ecx)
2773	movlpd	%xmm0, 6(%ecx)
2774	RETURN
2775
2776	.p2align 4
2777L(Fill15):
2778	movlpd	%xmm0, (%ecx)
2779	movlpd	%xmm0, 7(%ecx)
2780	RETURN
2781
2782	.p2align 4
2783L(Fill16):
2784	movlpd	%xmm0, (%ecx)
2785	movlpd	%xmm0, 8(%ecx)
2786	RETURN
2787
2788	.p2align 4
2789L(StrncpyFillExit1):
2790	lea	16(%ebx), %ebx
2791L(FillFrom1To16Bytes):
2792	test	%ebx, %ebx
2793	jz	L(Fill0)
2794	cmp	$16, %ebx
2795	je	L(Fill16)
2796	cmp	$8, %ebx
2797	je	L(Fill8)
2798	jg	L(FillMore8)
2799	cmp	$4, %ebx
2800	je	L(Fill4)
2801	jg	L(FillMore4)
2802	cmp	$2, %ebx
2803	jl	L(Fill1)
2804	je	L(Fill2)
2805	jg	L(Fill3)
2806L(FillMore8):	/* but less than 16 */
2807	cmp	$12, %ebx
2808	je	L(Fill12)
2809	jl	L(FillLess12)
2810	cmp	$14, %ebx
2811	jl	L(Fill13)
2812	je	L(Fill14)
2813	jg	L(Fill15)
2814L(FillMore4):	/* but less than 8 */
2815	cmp	$6, %ebx
2816	jl	L(Fill5)
2817	je	L(Fill6)
2818	jg	L(Fill7)
2819L(FillLess12):	/* but more than 8 */
2820	cmp	$10, %ebx
2821	jl	L(Fill9)
2822	je	L(Fill10)
2823	jmp	L(Fill11)
2824
2825	CFI_PUSH(%edi)
2826
2827	.p2align 4
2828L(StrncpyFillTailWithZero1):
2829	POP	(%edi)
2830L(StrncpyFillTailWithZero):
2831	pxor	%xmm0, %xmm0
2832	xor	%edx, %edx
2833	sub	$16, %ebx
2834	jbe	L(StrncpyFillExit1)
2835
2836	movlpd	%xmm0, (%ecx)
2837	movlpd	%xmm0, 8(%ecx)
2838
2839	lea	16(%ecx), %ecx
2840
2841	mov	%ecx, %edx
2842	and	$0xf, %edx
2843	sub	%edx, %ecx
2844	add	%edx, %ebx
2845	xor	%edx, %edx
2846	sub	$64, %ebx
2847	jb	L(StrncpyFillLess64)
2848
2849L(StrncpyFillLoopMovdqa):
2850	movdqa	%xmm0, (%ecx)
2851	movdqa	%xmm0, 16(%ecx)
2852	movdqa	%xmm0, 32(%ecx)
2853	movdqa	%xmm0, 48(%ecx)
2854	lea	64(%ecx), %ecx
2855	sub	$64, %ebx
2856	jae	L(StrncpyFillLoopMovdqa)
2857
2858L(StrncpyFillLess64):
2859	add	$32, %ebx
2860	jl	L(StrncpyFillLess32)
2861	movdqa	%xmm0, (%ecx)
2862	movdqa	%xmm0, 16(%ecx)
2863	lea	32(%ecx), %ecx
2864	sub	$16, %ebx
2865	jl	L(StrncpyFillExit1)
2866	movdqa	%xmm0, (%ecx)
2867	lea	16(%ecx), %ecx
2868	jmp	L(FillFrom1To16Bytes)
2869
2870L(StrncpyFillLess32):
2871	add	$16, %ebx
2872	jl	L(StrncpyFillExit1)
2873	movdqa	%xmm0, (%ecx)
2874	lea	16(%ecx), %ecx
2875	jmp	L(FillFrom1To16Bytes)
2876# endif
2877
2878	.p2align 4
2879L(ExitTail1):
2880	movb	(%ecx), %al
2881	movb	%al, (%edx)
2882	SAVE_RESULT_TAIL (0)
2883# ifdef USE_AS_STRNCPY
2884	sub	$1, %ebx
2885	lea	1(%edx), %ecx
2886	jnz	L(StrncpyFillTailWithZero)
2887#  ifdef USE_AS_STPCPY
2888	cmpb	$1, (%eax)
2889	sbb	$-1, %eax
2890#  endif
2891# endif
2892	RETURN
2893
2894	.p2align 4
2895L(ExitTail2):
2896	movw	(%ecx), %ax
2897	movw	%ax, (%edx)
2898	SAVE_RESULT_TAIL (1)
2899# ifdef USE_AS_STRNCPY
2900	sub	$2, %ebx
2901	lea	2(%edx), %ecx
2902	jnz	L(StrncpyFillTailWithZero)
2903#  ifdef USE_AS_STPCPY
2904	cmpb	$1, (%eax)
2905	sbb	$-1, %eax
2906#  endif
2907# endif
2908	RETURN
2909
2910	.p2align 4
2911L(ExitTail3):
2912	movw	(%ecx), %ax
2913	movw	%ax, (%edx)
2914	movb	2(%ecx), %al
2915	movb	%al, 2(%edx)
2916	SAVE_RESULT_TAIL (2)
2917# ifdef USE_AS_STRNCPY
2918	sub	$3, %ebx
2919	lea	3(%edx), %ecx
2920	jnz	L(StrncpyFillTailWithZero)
2921#  ifdef USE_AS_STPCPY
2922	cmpb	$1, (%eax)
2923	sbb	$-1, %eax
2924#  endif
2925# endif
2926	RETURN
2927
2928	.p2align 4
2929L(ExitTail4):
2930	movl	(%ecx), %eax
2931	movl	%eax, (%edx)
2932	SAVE_RESULT_TAIL (3)
2933# ifdef USE_AS_STRNCPY
2934	sub	$4, %ebx
2935	lea	4(%edx), %ecx
2936	jnz	L(StrncpyFillTailWithZero)
2937#  ifdef USE_AS_STPCPY
2938	cmpb	$1, (%eax)
2939	sbb	$-1, %eax
2940#  endif
2941# endif
2942	RETURN
2943
2944	.p2align 4
2945L(ExitTail5):
2946	movl	(%ecx), %eax
2947	movl	%eax, (%edx)
2948	movb	4(%ecx), %al
2949	movb	%al, 4(%edx)
2950	SAVE_RESULT_TAIL (4)
2951# ifdef USE_AS_STRNCPY
2952	sub	$5, %ebx
2953	lea	5(%edx), %ecx
2954	jnz	L(StrncpyFillTailWithZero)
2955#  ifdef USE_AS_STPCPY
2956	cmpb	$1, (%eax)
2957	sbb	$-1, %eax
2958#  endif
2959# endif
2960	RETURN
2961
2962	.p2align 4
2963L(ExitTail6):
2964	movl	(%ecx), %eax
2965	movl	%eax, (%edx)
2966	movw	4(%ecx), %ax
2967	movw	%ax, 4(%edx)
2968	SAVE_RESULT_TAIL (5)
2969# ifdef USE_AS_STRNCPY
2970	sub	$6, %ebx
2971	lea	6(%edx), %ecx
2972	jnz	L(StrncpyFillTailWithZero)
2973#  ifdef USE_AS_STPCPY
2974	cmpb	$1, (%eax)
2975	sbb	$-1, %eax
2976#  endif
2977# endif
2978	RETURN
2979
2980	.p2align 4
2981L(ExitTail7):
2982	movl	(%ecx), %eax
2983	movl	%eax, (%edx)
2984	movl	3(%ecx), %eax
2985	movl	%eax, 3(%edx)
2986	SAVE_RESULT_TAIL (6)
2987# ifdef USE_AS_STRNCPY
2988	sub	$7, %ebx
2989	lea	7(%edx), %ecx
2990	jnz	L(StrncpyFillTailWithZero)
2991#  ifdef USE_AS_STPCPY
2992	cmpb	$1, (%eax)
2993	sbb	$-1, %eax
2994#  endif
2995# endif
2996	RETURN
2997
2998	.p2align 4
2999L(ExitTail8):
3000	movlpd	(%ecx), %xmm0
3001	movlpd	%xmm0, (%edx)
3002	SAVE_RESULT_TAIL (7)
3003# ifdef USE_AS_STRNCPY
3004	sub	$8, %ebx
3005	lea	8(%edx), %ecx
3006	jnz	L(StrncpyFillTailWithZero)
3007# endif
3008	RETURN
3009
3010	.p2align 4
3011L(ExitTail9):
3012	movlpd	(%ecx), %xmm0
3013	movb	8(%ecx), %al
3014	movlpd	%xmm0, (%edx)
3015	movb	%al, 8(%edx)
3016	SAVE_RESULT_TAIL (8)
3017# ifdef USE_AS_STRNCPY
3018	sub	$9, %ebx
3019	lea	9(%edx), %ecx
3020	jnz	L(StrncpyFillTailWithZero)
3021#  ifdef USE_AS_STPCPY
3022	cmpb	$1, (%eax)
3023	sbb	$-1, %eax
3024#  endif
3025# endif
3026	RETURN
3027
3028	.p2align 4
3029L(ExitTail10):
3030	movlpd	(%ecx), %xmm0
3031	movw	8(%ecx), %ax
3032	movlpd	%xmm0, (%edx)
3033	movw	%ax, 8(%edx)
3034	SAVE_RESULT_TAIL (9)
3035# ifdef USE_AS_STRNCPY
3036	sub	$10, %ebx
3037	lea	10(%edx), %ecx
3038	jnz	L(StrncpyFillTailWithZero)
3039#  ifdef USE_AS_STPCPY
3040	cmpb	$1, (%eax)
3041	sbb	$-1, %eax
3042#  endif
3043# endif
3044	RETURN
3045
3046	.p2align 4
3047L(ExitTail11):
3048	movlpd	(%ecx), %xmm0
3049	movl	7(%ecx), %eax
3050	movlpd	%xmm0, (%edx)
3051	movl	%eax, 7(%edx)
3052	SAVE_RESULT_TAIL (10)
3053# ifdef USE_AS_STRNCPY
3054	sub	$11, %ebx
3055	lea	11(%edx), %ecx
3056	jnz	L(StrncpyFillTailWithZero)
3057#  ifdef USE_AS_STPCPY
3058	cmpb	$1, (%eax)
3059	sbb	$-1, %eax
3060#  endif
3061# endif
3062	RETURN
3063
3064	.p2align 4
3065L(ExitTail12):
3066	movlpd	(%ecx), %xmm0
3067	movl	8(%ecx), %eax
3068	movlpd	%xmm0, (%edx)
3069	movl	%eax, 8(%edx)
3070	SAVE_RESULT_TAIL (11)
3071# ifdef USE_AS_STRNCPY
3072	sub	$12, %ebx
3073	lea	12(%edx), %ecx
3074	jnz	L(StrncpyFillTailWithZero)
3075#  ifdef USE_AS_STPCPY
3076	cmpb	$1, (%eax)
3077	sbb	$-1, %eax
3078#  endif
3079# endif
3080	RETURN
3081
3082	.p2align 4
3083L(ExitTail13):
3084	movlpd	(%ecx), %xmm0
3085	movlpd	5(%ecx), %xmm1
3086	movlpd	%xmm0, (%edx)
3087	movlpd	%xmm1, 5(%edx)
3088	SAVE_RESULT_TAIL (12)
3089# ifdef USE_AS_STRNCPY
3090	sub	$13, %ebx
3091	lea	13(%edx), %ecx
3092	jnz	L(StrncpyFillTailWithZero)
3093#  ifdef USE_AS_STPCPY
3094	cmpb	$1, (%eax)
3095	sbb	$-1, %eax
3096#  endif
3097# endif
3098	RETURN
3099
3100	.p2align 4
3101L(ExitTail14):
3102	movlpd	(%ecx), %xmm0
3103	movlpd	6(%ecx), %xmm1
3104	movlpd	%xmm0, (%edx)
3105	movlpd	%xmm1, 6(%edx)
3106	SAVE_RESULT_TAIL (13)
3107# ifdef USE_AS_STRNCPY
3108	sub	$14, %ebx
3109	lea	14(%edx), %ecx
3110	jnz	L(StrncpyFillTailWithZero)
3111# ifdef USE_AS_STPCPY
3112	cmpb	$1, (%eax)
3113	sbb	$-1, %eax
3114#  endif
3115# endif
3116	RETURN
3117
3118	.p2align 4
3119L(ExitTail15):
3120	movlpd	(%ecx), %xmm0
3121	movlpd	7(%ecx), %xmm1
3122	movlpd	%xmm0, (%edx)
3123	movlpd	%xmm1, 7(%edx)
3124	SAVE_RESULT_TAIL (14)
3125# ifdef USE_AS_STRNCPY
3126	sub	$15, %ebx
3127	lea	15(%edx), %ecx
3128	jnz	L(StrncpyFillTailWithZero)
3129# endif
3130	RETURN
3131
3132	.p2align 4
3133L(ExitTail16):
3134	movdqu	(%ecx), %xmm0
3135	movdqu	%xmm0, (%edx)
3136	SAVE_RESULT_TAIL (15)
3137# ifdef USE_AS_STRNCPY
3138	sub	$16, %ebx
3139	lea	16(%edx), %ecx
3140	jnz	L(StrncpyFillTailWithZero)
3141#  ifdef USE_AS_STPCPY
3142	cmpb	$1, (%eax)
3143	sbb	$-1, %eax
3144#  endif
3145# endif
3146	RETURN
3147#endif
3148
3149#ifdef USE_AS_STRNCPY
3150# if !defined(USE_AS_STRCAT) && !defined(USE_AS_STRLCPY)
3151	CFI_PUSH (%esi)
3152	CFI_PUSH (%edi)
3153# endif
3154	.p2align 4
3155L(StrncpyLeaveCase2OrCase3):
3156	test	%eax, %eax
3157	jnz	L(Aligned64LeaveCase2)
3158
3159L(Aligned64LeaveCase3):
3160	add	$48, %ebx
3161	jle	L(CopyFrom1To16BytesCase3)
3162	movaps	%xmm4, -64(%edx)
3163	lea	16(%esi), %esi
3164	sub	$16, %ebx
3165	jbe	L(CopyFrom1To16BytesCase3)
3166	movaps	%xmm5, -48(%edx)
3167	lea	16(%esi), %esi
3168	sub	$16, %ebx
3169	jbe	L(CopyFrom1To16BytesCase3)
3170	movaps	%xmm6, -32(%edx)
3171	lea	16(%esi), %esi
3172	lea	-16(%ebx), %ebx
3173	jmp	L(CopyFrom1To16BytesCase3)
3174
3175L(Aligned64LeaveCase2):
3176	pcmpeqb	%xmm4, %xmm0
3177	pmovmskb %xmm0, %eax
3178	add	$48, %ebx
3179	jle	L(CopyFrom1To16BytesCase2OrCase3)
3180	test	%eax, %eax
3181	jnz	L(CopyFrom1To16Bytes)
3182
3183	pcmpeqb	%xmm5, %xmm0
3184	pmovmskb %xmm0, %eax
3185	movaps	%xmm4, -64(%edx)
3186	lea	16(%esi), %esi
3187	sub	$16, %ebx
3188	jbe	L(CopyFrom1To16BytesCase2OrCase3)
3189	test	%eax, %eax
3190	jnz	L(CopyFrom1To16Bytes)
3191
3192	pcmpeqb	%xmm6, %xmm0
3193	pmovmskb %xmm0, %eax
3194	movaps	%xmm5, -48(%edx)
3195	lea	16(%esi), %esi
3196	sub	$16, %ebx
3197	jbe	L(CopyFrom1To16BytesCase2OrCase3)
3198	test	%eax, %eax
3199	jnz	L(CopyFrom1To16Bytes)
3200
3201	pcmpeqb	%xmm7, %xmm0
3202	pmovmskb %xmm0, %eax
3203	movaps	%xmm6, -32(%edx)
3204	lea	16(%esi), %esi
3205	lea	-16(%ebx), %ebx
3206	jmp	L(CopyFrom1To16BytesCase2)
3207
3208/*--------------------------------------------------*/
3209	.p2align 4
3210L(StrncpyExit1Case2OrCase3):
3211	movlpd	(%ecx), %xmm0
3212	movlpd	7(%ecx), %xmm1
3213	movlpd	%xmm0, (%edx)
3214	movlpd	%xmm1, 7(%edx)
3215	mov	$15, %esi
3216	test	%eax, %eax
3217	jnz	L(CopyFrom1To16BytesCase2)
3218	jmp	L(CopyFrom1To16BytesCase3)
3219
3220	.p2align 4
3221L(StrncpyExit2Case2OrCase3):
3222	movlpd	(%ecx), %xmm0
3223	movlpd	6(%ecx), %xmm1
3224	movlpd	%xmm0, (%edx)
3225	movlpd	%xmm1, 6(%edx)
3226	mov	$14, %esi
3227	test	%eax, %eax
3228	jnz	L(CopyFrom1To16BytesCase2)
3229	jmp	L(CopyFrom1To16BytesCase3)
3230
3231	.p2align 4
3232L(StrncpyExit3Case2OrCase3):
3233	movlpd	(%ecx), %xmm0
3234	movlpd	5(%ecx), %xmm1
3235	movlpd	%xmm0, (%edx)
3236	movlpd	%xmm1, 5(%edx)
3237	mov	$13, %esi
3238	test	%eax, %eax
3239	jnz	L(CopyFrom1To16BytesCase2)
3240	jmp	L(CopyFrom1To16BytesCase3)
3241
3242	.p2align 4
3243L(StrncpyExit4Case2OrCase3):
3244	movlpd	(%ecx), %xmm0
3245	movl	8(%ecx), %esi
3246	movlpd	%xmm0, (%edx)
3247	movl	%esi, 8(%edx)
3248	mov	$12, %esi
3249	test	%eax, %eax
3250	jnz	L(CopyFrom1To16BytesCase2)
3251	jmp	L(CopyFrom1To16BytesCase3)
3252
3253	.p2align 4
3254L(StrncpyExit5Case2OrCase3):
3255	movlpd	(%ecx), %xmm0
3256	movl	7(%ecx), %esi
3257	movlpd	%xmm0, (%edx)
3258	movl	%esi, 7(%edx)
3259	mov	$11, %esi
3260	test	%eax, %eax
3261	jnz	L(CopyFrom1To16BytesCase2)
3262	jmp	L(CopyFrom1To16BytesCase3)
3263
3264	.p2align 4
3265L(StrncpyExit6Case2OrCase3):
3266	movlpd	(%ecx), %xmm0
3267	movl	6(%ecx), %esi
3268	movlpd	%xmm0, (%edx)
3269	movl	%esi, 6(%edx)
3270	mov	$10, %esi
3271	test	%eax, %eax
3272	jnz	L(CopyFrom1To16BytesCase2)
3273	jmp	L(CopyFrom1To16BytesCase3)
3274
3275	.p2align 4
3276L(StrncpyExit7Case2OrCase3):
3277	movlpd	(%ecx), %xmm0
3278	movl	5(%ecx), %esi
3279	movlpd	%xmm0, (%edx)
3280	movl	%esi, 5(%edx)
3281	mov	$9, %esi
3282	test	%eax, %eax
3283	jnz	L(CopyFrom1To16BytesCase2)
3284	jmp	L(CopyFrom1To16BytesCase3)
3285
3286	.p2align 4
3287L(StrncpyExit8Case2OrCase3):
3288	movlpd	(%ecx), %xmm0
3289	movlpd	%xmm0, (%edx)
3290	mov	$8, %esi
3291	test	%eax, %eax
3292	jnz	L(CopyFrom1To16BytesCase2)
3293	jmp	L(CopyFrom1To16BytesCase3)
3294
3295	.p2align 4
3296L(StrncpyExit9Case2OrCase3):
3297	movlpd	(%ecx), %xmm0
3298	movlpd	%xmm0, (%edx)
3299	mov	$7, %esi
3300	test	%eax, %eax
3301	jnz	L(CopyFrom1To16BytesCase2)
3302	jmp	L(CopyFrom1To16BytesCase3)
3303
3304	.p2align 4
3305L(StrncpyExit10Case2OrCase3):
3306	movlpd	-1(%ecx), %xmm0
3307	movlpd	%xmm0, -1(%edx)
3308	mov	$6, %esi
3309	test	%eax, %eax
3310	jnz	L(CopyFrom1To16BytesCase2)
3311	jmp	L(CopyFrom1To16BytesCase3)
3312
3313	.p2align 4
3314L(StrncpyExit11Case2OrCase3):
3315	movlpd	-2(%ecx), %xmm0
3316	movlpd	%xmm0, -2(%edx)
3317	mov	$5, %esi
3318	test	%eax, %eax
3319	jnz	L(CopyFrom1To16BytesCase2)
3320	jmp	L(CopyFrom1To16BytesCase3)
3321
3322	.p2align 4
3323L(StrncpyExit12Case2OrCase3):
3324	movl	(%ecx), %esi
3325	movl	%esi, (%edx)
3326	mov	$4, %esi
3327	test	%eax, %eax
3328	jnz	L(CopyFrom1To16BytesCase2)
3329	jmp	L(CopyFrom1To16BytesCase3)
3330
3331	.p2align 4
3332L(StrncpyExit13Case2OrCase3):
3333	movl	-1(%ecx), %esi
3334	movl	%esi, -1(%edx)
3335	mov	$3, %esi
3336	test	%eax, %eax
3337	jnz	L(CopyFrom1To16BytesCase2)
3338	jmp	L(CopyFrom1To16BytesCase3)
3339
3340	.p2align 4
3341L(StrncpyExit14Case2OrCase3):
3342	movl	-2(%ecx), %esi
3343	movl	%esi, -2(%edx)
3344	mov	$2, %esi
3345	test	%eax, %eax
3346	jnz	L(CopyFrom1To16BytesCase2)
3347	jmp	L(CopyFrom1To16BytesCase3)
3348
3349	.p2align 4
3350L(StrncpyExit15Case2OrCase3):
3351	movl	-3(%ecx), %esi
3352	movl	%esi, -3(%edx)
3353	mov	$1, %esi
3354	test	%eax, %eax
3355	jnz	L(CopyFrom1To16BytesCase2)
3356	jmp	L(CopyFrom1To16BytesCase3)
3357
3358L(StrncpyLeave1):
3359	movaps	%xmm2, %xmm3
3360	add	$48, %ebx
3361	jle	L(StrncpyExit1)
3362	palignr	$1, %xmm1, %xmm2
3363	movaps	%xmm2, (%edx)
3364	movaps	31(%ecx), %xmm2
3365	lea	16(%esi), %esi
3366	sub	$16, %ebx
3367	jbe	L(StrncpyExit1)
3368	palignr	$1, %xmm3, %xmm2
3369	movaps	%xmm2, 16(%edx)
3370	lea	16(%esi), %esi
3371	sub	$16, %ebx
3372	jbe	L(StrncpyExit1)
3373	movaps	%xmm4, 32(%edx)
3374	lea	16(%esi), %esi
3375	sub	$16, %ebx
3376	jbe	L(StrncpyExit1)
3377	movaps	%xmm5, 48(%edx)
3378	lea	16(%esi), %esi
3379	lea	-16(%ebx), %ebx
3380L(StrncpyExit1):
3381	lea	15(%edx, %esi), %edx
3382	lea	15(%ecx, %esi), %ecx
3383	movdqu	-16(%ecx), %xmm0
3384	xor	%esi, %esi
3385	movdqu	%xmm0, -16(%edx)
3386	jmp	L(CopyFrom1To16BytesCase3)
3387
3388L(StrncpyLeave2):
3389	movaps	%xmm2, %xmm3
3390	add	$48, %ebx
3391	jle	L(StrncpyExit2)
3392	palignr	$2, %xmm1, %xmm2
3393	movaps	%xmm2, (%edx)
3394	movaps	30(%ecx), %xmm2
3395	lea	16(%esi), %esi
3396	sub	$16, %ebx
3397	jbe	L(StrncpyExit2)
3398	palignr	$2, %xmm3, %xmm2
3399	movaps	%xmm2, 16(%edx)
3400	lea	16(%esi), %esi
3401	sub	$16, %ebx
3402	jbe	L(StrncpyExit2)
3403	movaps	%xmm4, 32(%edx)
3404	lea	16(%esi), %esi
3405	sub	$16, %ebx
3406	jbe	L(StrncpyExit2)
3407	movaps	%xmm5, 48(%edx)
3408	lea	16(%esi), %esi
3409	lea	-16(%ebx), %ebx
3410L(StrncpyExit2):
3411	lea	14(%edx, %esi), %edx
3412	lea	14(%ecx, %esi), %ecx
3413	movdqu	-16(%ecx), %xmm0
3414	xor	%esi, %esi
3415	movdqu	%xmm0, -16(%edx)
3416	jmp	L(CopyFrom1To16BytesCase3)
3417
3418L(StrncpyLeave3):
3419	movaps	%xmm2, %xmm3
3420	add	$48, %ebx
3421	jle	L(StrncpyExit3)
3422	palignr	$3, %xmm1, %xmm2
3423	movaps	%xmm2, (%edx)
3424	movaps	29(%ecx), %xmm2
3425	lea	16(%esi), %esi
3426	sub	$16, %ebx
3427	jbe	L(StrncpyExit3)
3428	palignr	$3, %xmm3, %xmm2
3429	movaps	%xmm2, 16(%edx)
3430	lea	16(%esi), %esi
3431	sub	$16, %ebx
3432	jbe	L(StrncpyExit3)
3433	movaps	%xmm4, 32(%edx)
3434	lea	16(%esi), %esi
3435	sub	$16, %ebx
3436	jbe	L(StrncpyExit3)
3437	movaps	%xmm5, 48(%edx)
3438	lea	16(%esi), %esi
3439	lea	-16(%ebx), %ebx
3440L(StrncpyExit3):
3441	lea	13(%edx, %esi), %edx
3442	lea	13(%ecx, %esi), %ecx
3443	movdqu	-16(%ecx), %xmm0
3444	xor	%esi, %esi
3445	movdqu	%xmm0, -16(%edx)
3446	jmp	L(CopyFrom1To16BytesCase3)
3447
3448L(StrncpyLeave4):
3449	movaps	%xmm2, %xmm3
3450	add	$48, %ebx
3451	jle	L(StrncpyExit4)
3452	palignr	$4, %xmm1, %xmm2
3453	movaps	%xmm2, (%edx)
3454	movaps	28(%ecx), %xmm2
3455	lea	16(%esi), %esi
3456	sub	$16, %ebx
3457	jbe	L(StrncpyExit4)
3458	palignr	$4, %xmm3, %xmm2
3459	movaps	%xmm2, 16(%edx)
3460	lea	16(%esi), %esi
3461	sub	$16, %ebx
3462	jbe	L(StrncpyExit4)
3463	movaps	%xmm4, 32(%edx)
3464	lea	16(%esi), %esi
3465	sub	$16, %ebx
3466	jbe	L(StrncpyExit4)
3467	movaps	%xmm5, 48(%edx)
3468	lea	16(%esi), %esi
3469	lea	-16(%ebx), %ebx
3470L(StrncpyExit4):
3471	lea	12(%edx, %esi), %edx
3472	lea	12(%ecx, %esi), %ecx
3473	movlpd	-12(%ecx), %xmm0
3474	movl	-4(%ecx), %eax
3475	movlpd	%xmm0, -12(%edx)
3476	movl	%eax, -4(%edx)
3477	xor	%esi, %esi
3478	jmp	L(CopyFrom1To16BytesCase3)
3479
3480L(StrncpyLeave5):
3481	movaps	%xmm2, %xmm3
3482	add	$48, %ebx
3483	jle	L(StrncpyExit5)
3484	palignr	$5, %xmm1, %xmm2
3485	movaps	%xmm2, (%edx)
3486	movaps	27(%ecx), %xmm2
3487	lea	16(%esi), %esi
3488	sub	$16, %ebx
3489	jbe	L(StrncpyExit5)
3490	palignr	$5, %xmm3, %xmm2
3491	movaps	%xmm2, 16(%edx)
3492	lea	16(%esi), %esi
3493	sub	$16, %ebx
3494	jbe	L(StrncpyExit5)
3495	movaps	%xmm4, 32(%edx)
3496	lea	16(%esi), %esi
3497	sub	$16, %ebx
3498	jbe	L(StrncpyExit5)
3499	movaps	%xmm5, 48(%edx)
3500	lea	16(%esi), %esi
3501	lea	-16(%ebx), %ebx
3502L(StrncpyExit5):
3503	lea	11(%edx, %esi), %edx
3504	lea	11(%ecx, %esi), %ecx
3505	movlpd	-11(%ecx), %xmm0
3506	movl	-4(%ecx), %eax
3507	movlpd	%xmm0, -11(%edx)
3508	movl	%eax, -4(%edx)
3509	xor	%esi, %esi
3510	jmp	L(CopyFrom1To16BytesCase3)
3511
3512L(StrncpyLeave6):
3513	movaps	%xmm2, %xmm3
3514	add	$48, %ebx
3515	jle	L(StrncpyExit6)
3516	palignr	$6, %xmm1, %xmm2
3517	movaps	%xmm2, (%edx)
3518	movaps	26(%ecx), %xmm2
3519	lea	16(%esi), %esi
3520	sub	$16, %ebx
3521	jbe	L(StrncpyExit6)
3522	palignr	$6, %xmm3, %xmm2
3523	movaps	%xmm2, 16(%edx)
3524	lea	16(%esi), %esi
3525	sub	$16, %ebx
3526	jbe	L(StrncpyExit6)
3527	movaps	%xmm4, 32(%edx)
3528	lea	16(%esi), %esi
3529	sub	$16, %ebx
3530	jbe	L(StrncpyExit6)
3531	movaps	%xmm5, 48(%edx)
3532	lea	16(%esi), %esi
3533	lea	-16(%ebx), %ebx
3534L(StrncpyExit6):
3535	lea	10(%edx, %esi), %edx
3536	lea	10(%ecx, %esi), %ecx
3537
3538	movlpd	-10(%ecx), %xmm0
3539	movw	-2(%ecx), %ax
3540	movlpd	%xmm0, -10(%edx)
3541	movw	%ax, -2(%edx)
3542	xor	%esi, %esi
3543	jmp	L(CopyFrom1To16BytesCase3)
3544
3545L(StrncpyLeave7):
3546	movaps	%xmm2, %xmm3
3547	add	$48, %ebx
3548	jle	L(StrncpyExit7)
3549	palignr	$7, %xmm1, %xmm2
3550	movaps	%xmm2, (%edx)
3551	movaps	25(%ecx), %xmm2
3552	lea	16(%esi), %esi
3553	sub	$16, %ebx
3554	jbe	L(StrncpyExit7)
3555	palignr	$7, %xmm3, %xmm2
3556	movaps	%xmm2, 16(%edx)
3557	lea	16(%esi), %esi
3558	sub	$16, %ebx
3559	jbe	L(StrncpyExit7)
3560	movaps	%xmm4, 32(%edx)
3561	lea	16(%esi), %esi
3562	sub	$16, %ebx
3563	jbe	L(StrncpyExit7)
3564	movaps	%xmm5, 48(%edx)
3565	lea	16(%esi), %esi
3566	lea	-16(%ebx), %ebx
3567L(StrncpyExit7):
3568	lea	9(%edx, %esi), %edx
3569	lea	9(%ecx, %esi), %ecx
3570
3571	movlpd	-9(%ecx), %xmm0
3572	movb	-1(%ecx), %ah
3573	movlpd	%xmm0, -9(%edx)
3574	movb	%ah, -1(%edx)
3575	xor	%esi, %esi
3576	jmp	L(CopyFrom1To16BytesCase3)
3577
3578L(StrncpyLeave8):
3579	movaps	%xmm2, %xmm3
3580	add	$48, %ebx
3581	jle	L(StrncpyExit8)
3582	palignr	$8, %xmm1, %xmm2
3583	movaps	%xmm2, (%edx)
3584	movaps	24(%ecx), %xmm2
3585	lea	16(%esi), %esi
3586	sub	$16, %ebx
3587	jbe	L(StrncpyExit8)
3588	palignr	$8, %xmm3, %xmm2
3589	movaps	%xmm2, 16(%edx)
3590	lea	16(%esi), %esi
3591	sub	$16, %ebx
3592	jbe	L(StrncpyExit8)
3593	movaps	%xmm4, 32(%edx)
3594	lea	16(%esi), %esi
3595	sub	$16, %ebx
3596	jbe	L(StrncpyExit8)
3597	movaps	%xmm5, 48(%edx)
3598	lea	16(%esi), %esi
3599	lea	-16(%ebx), %ebx
3600L(StrncpyExit8):
3601	lea	8(%edx, %esi), %edx
3602	lea	8(%ecx, %esi), %ecx
3603	movlpd	-8(%ecx), %xmm0
3604	movlpd	%xmm0, -8(%edx)
3605	xor	%esi, %esi
3606	jmp	L(CopyFrom1To16BytesCase3)
3607
3608L(StrncpyLeave9):
3609	movaps	%xmm2, %xmm3
3610	add	$48, %ebx
3611	jle	L(StrncpyExit9)
3612	palignr	$9, %xmm1, %xmm2
3613	movaps	%xmm2, (%edx)
3614	movaps	23(%ecx), %xmm2
3615	lea	16(%esi), %esi
3616	sub	$16, %ebx
3617	jbe	L(StrncpyExit9)
3618	palignr	$9, %xmm3, %xmm2
3619	movaps	%xmm2, 16(%edx)
3620	lea	16(%esi), %esi
3621	sub	$16, %ebx
3622	jbe	L(StrncpyExit9)
3623	movaps	%xmm4, 32(%edx)
3624	lea	16(%esi), %esi
3625	sub	$16, %ebx
3626	jbe	L(StrncpyExit9)
3627	movaps	%xmm5, 48(%edx)
3628	lea	16(%esi), %esi
3629	lea	-16(%ebx), %ebx
3630L(StrncpyExit9):
3631	lea	7(%edx, %esi), %edx
3632	lea	7(%ecx, %esi), %ecx
3633
3634	movlpd	-8(%ecx), %xmm0
3635	movlpd	%xmm0, -8(%edx)
3636	xor	%esi, %esi
3637	jmp	L(CopyFrom1To16BytesCase3)
3638
3639L(StrncpyLeave10):
3640	movaps	%xmm2, %xmm3
3641	add	$48, %ebx
3642	jle	L(StrncpyExit10)
3643	palignr	$10, %xmm1, %xmm2
3644	movaps	%xmm2, (%edx)
3645	movaps	22(%ecx), %xmm2
3646	lea	16(%esi), %esi
3647	sub	$16, %ebx
3648	jbe	L(StrncpyExit10)
3649	palignr	$10, %xmm3, %xmm2
3650	movaps	%xmm2, 16(%edx)
3651	lea	16(%esi), %esi
3652	sub	$16, %ebx
3653	jbe	L(StrncpyExit10)
3654	movaps	%xmm4, 32(%edx)
3655	lea	16(%esi), %esi
3656	sub	$16, %ebx
3657	jbe	L(StrncpyExit10)
3658	movaps	%xmm5, 48(%edx)
3659	lea	16(%esi), %esi
3660	lea	-16(%ebx), %ebx
3661L(StrncpyExit10):
3662	lea	6(%edx, %esi), %edx
3663	lea	6(%ecx, %esi), %ecx
3664
3665	movlpd	-8(%ecx), %xmm0
3666	movlpd	%xmm0, -8(%edx)
3667	xor	%esi, %esi
3668	jmp	L(CopyFrom1To16BytesCase3)
3669
3670L(StrncpyLeave11):
3671	movaps	%xmm2, %xmm3
3672	add	$48, %ebx
3673	jle	L(StrncpyExit11)
3674	palignr	$11, %xmm1, %xmm2
3675	movaps	%xmm2, (%edx)
3676	movaps	21(%ecx), %xmm2
3677	lea	16(%esi), %esi
3678	sub	$16, %ebx
3679	jbe	L(StrncpyExit11)
3680	palignr	$11, %xmm3, %xmm2
3681	movaps	%xmm2, 16(%edx)
3682	lea	16(%esi), %esi
3683	sub	$16, %ebx
3684	jbe	L(StrncpyExit11)
3685	movaps	%xmm4, 32(%edx)
3686	lea	16(%esi), %esi
3687	sub	$16, %ebx
3688	jbe	L(StrncpyExit11)
3689	movaps	%xmm5, 48(%edx)
3690	lea	16(%esi), %esi
3691	lea	-16(%ebx), %ebx
3692L(StrncpyExit11):
3693	lea	5(%edx, %esi), %edx
3694	lea	5(%ecx, %esi), %ecx
3695	movl	-5(%ecx), %esi
3696	movb	-1(%ecx), %ah
3697	movl	%esi, -5(%edx)
3698	movb	%ah, -1(%edx)
3699	xor	%esi, %esi
3700	jmp	L(CopyFrom1To16BytesCase3)
3701
3702L(StrncpyLeave12):
3703	movaps	%xmm2, %xmm3
3704	add	$48, %ebx
3705	jle	L(StrncpyExit12)
3706	palignr	$12, %xmm1, %xmm2
3707	movaps	%xmm2, (%edx)
3708	movaps	20(%ecx), %xmm2
3709	lea	16(%esi), %esi
3710	sub	$16, %ebx
3711	jbe	L(StrncpyExit12)
3712	palignr	$12, %xmm3, %xmm2
3713	movaps	%xmm2, 16(%edx)
3714	lea	16(%esi), %esi
3715	sub	$16, %ebx
3716	jbe	L(StrncpyExit12)
3717	movaps	%xmm4, 32(%edx)
3718	lea	16(%esi), %esi
3719	sub	$16, %ebx
3720	jbe	L(StrncpyExit12)
3721	movaps	%xmm5, 48(%edx)
3722	lea	16(%esi), %esi
3723	lea	-16(%ebx), %ebx
3724L(StrncpyExit12):
3725	lea	4(%edx, %esi), %edx
3726	lea	4(%ecx, %esi), %ecx
3727	movl	-4(%ecx), %eax
3728	movl	%eax, -4(%edx)
3729	xor	%esi, %esi
3730	jmp	L(CopyFrom1To16BytesCase3)
3731
3732L(StrncpyLeave13):
3733	movaps	%xmm2, %xmm3
3734	add	$48, %ebx
3735	jle	L(StrncpyExit13)
3736	palignr	$13, %xmm1, %xmm2
3737	movaps	%xmm2, (%edx)
3738	movaps	19(%ecx), %xmm2
3739	lea	16(%esi), %esi
3740	sub	$16, %ebx
3741	jbe	L(StrncpyExit13)
3742	palignr	$13, %xmm3, %xmm2
3743	movaps	%xmm2, 16(%edx)
3744	lea	16(%esi), %esi
3745	sub	$16, %ebx
3746	jbe	L(StrncpyExit13)
3747	movaps	%xmm4, 32(%edx)
3748	lea	16(%esi), %esi
3749	sub	$16, %ebx
3750	jbe	L(StrncpyExit13)
3751	movaps	%xmm5, 48(%edx)
3752	lea	16(%esi), %esi
3753	lea	-16(%ebx), %ebx
3754L(StrncpyExit13):
3755	lea	3(%edx, %esi), %edx
3756	lea	3(%ecx, %esi), %ecx
3757
3758	movl	-4(%ecx), %eax
3759	movl	%eax, -4(%edx)
3760	xor	%esi, %esi
3761	jmp	L(CopyFrom1To16BytesCase3)
3762
3763L(StrncpyLeave14):
3764	movaps	%xmm2, %xmm3
3765	add	$48, %ebx
3766	jle	L(StrncpyExit14)
3767	palignr	$14, %xmm1, %xmm2
3768	movaps	%xmm2, (%edx)
3769	movaps	18(%ecx), %xmm2
3770	lea	16(%esi), %esi
3771	sub	$16, %ebx
3772	jbe	L(StrncpyExit14)
3773	palignr	$14, %xmm3, %xmm2
3774	movaps	%xmm2, 16(%edx)
3775	lea	16(%esi), %esi
3776	sub	$16, %ebx
3777	jbe	L(StrncpyExit14)
3778	movaps	%xmm4, 32(%edx)
3779	lea	16(%esi), %esi
3780	sub	$16, %ebx
3781	jbe	L(StrncpyExit14)
3782	movaps	%xmm5, 48(%edx)
3783	lea	16(%esi), %esi
3784	lea	-16(%ebx), %ebx
3785L(StrncpyExit14):
3786	lea	2(%edx, %esi), %edx
3787	lea	2(%ecx, %esi), %ecx
3788	movw	-2(%ecx), %ax
3789	movw	%ax, -2(%edx)
3790	xor	%esi, %esi
3791	jmp	L(CopyFrom1To16BytesCase3)
3792
3793L(StrncpyLeave15):
3794	movaps	%xmm2, %xmm3
3795	add	$48, %ebx
3796	jle	L(StrncpyExit15)
3797	palignr	$15, %xmm1, %xmm2
3798	movaps	%xmm2, (%edx)
3799	movaps	17(%ecx), %xmm2
3800	lea	16(%esi), %esi
3801	sub	$16, %ebx
3802	jbe	L(StrncpyExit15)
3803	palignr	$15, %xmm3, %xmm2
3804	movaps	%xmm2, 16(%edx)
3805	lea	16(%esi), %esi
3806	sub	$16, %ebx
3807	jbe	L(StrncpyExit15)
3808	movaps	%xmm4, 32(%edx)
3809	lea	16(%esi), %esi
3810	sub	$16, %ebx
3811	jbe	L(StrncpyExit15)
3812	movaps	%xmm5, 48(%edx)
3813	lea	16(%esi), %esi
3814	lea	-16(%ebx), %ebx
3815L(StrncpyExit15):
3816	lea	1(%edx, %esi), %edx
3817	lea	1(%ecx, %esi), %ecx
3818	movb	-1(%ecx), %ah
3819	movb	%ah, -1(%edx)
3820	xor	%esi, %esi
3821	jmp	L(CopyFrom1To16BytesCase3)
3822#endif
3823
3824#if !defined USE_AS_STRCAT && ! defined USE_AS_STRLCPY
3825# ifdef USE_AS_STRNCPY
3826	CFI_POP (%esi)
3827	CFI_POP (%edi)
3828
3829	.p2align 4
3830L(ExitTail0):
3831	movl	%edx, %eax
3832	RETURN
3833
3834	.p2align 4
3835L(StrncpyExit15Bytes):
3836	cmp	$12, %ebx
3837	jbe	L(StrncpyExit12Bytes)
3838	cmpb	$0, 8(%ecx)
3839	jz	L(ExitTail9)
3840	cmpb	$0, 9(%ecx)
3841	jz	L(ExitTail10)
3842	cmpb	$0, 10(%ecx)
3843	jz	L(ExitTail11)
3844	cmpb	$0, 11(%ecx)
3845	jz	L(ExitTail12)
3846	cmp	$13, %ebx
3847	je	L(ExitTail13)
3848	cmpb	$0, 12(%ecx)
3849	jz	L(ExitTail13)
3850	cmp	$14, %ebx
3851	je	L(ExitTail14)
3852	cmpb	$0, 13(%ecx)
3853	jz	L(ExitTail14)
3854	movlpd	(%ecx), %xmm0
3855	movlpd	7(%ecx), %xmm1
3856	movlpd	%xmm0, (%edx)
3857	movlpd	%xmm1, 7(%edx)
3858#  ifdef USE_AS_STPCPY
3859	lea	14(%edx), %eax
3860	cmpb	$1, (%eax)
3861	sbb	$-1, %eax
3862#  else
3863	movl	%edx, %eax
3864#  endif
3865	RETURN
3866
3867	.p2align 4
3868L(StrncpyExit12Bytes):
3869	cmp	$9, %ebx
3870	je	L(ExitTail9)
3871	cmpb	$0, 8(%ecx)
3872	jz	L(ExitTail9)
3873	cmp	$10, %ebx
3874	je	L(ExitTail10)
3875	cmpb	$0, 9(%ecx)
3876	jz	L(ExitTail10)
3877	cmp	$11, %ebx
3878	je	L(ExitTail11)
3879	cmpb	$0, 10(%ecx)
3880	jz	L(ExitTail11)
3881	movlpd	(%ecx), %xmm0
3882	movl	8(%ecx), %eax
3883	movlpd	%xmm0, (%edx)
3884	movl	%eax, 8(%edx)
3885	SAVE_RESULT_TAIL (11)
3886#  ifdef USE_AS_STPCPY
3887	cmpb	$1, (%eax)
3888	sbb	$-1, %eax
3889#  endif
3890	RETURN
3891
3892	.p2align 4
3893L(StrncpyExit8Bytes):
3894	cmp	$4, %ebx
3895	jbe	L(StrncpyExit4Bytes)
3896	cmpb	$0, (%ecx)
3897	jz	L(ExitTail1)
3898	cmpb	$0, 1(%ecx)
3899	jz	L(ExitTail2)
3900	cmpb	$0, 2(%ecx)
3901	jz	L(ExitTail3)
3902	cmpb	$0, 3(%ecx)
3903	jz	L(ExitTail4)
3904
3905	cmp	$5, %ebx
3906	je	L(ExitTail5)
3907	cmpb	$0, 4(%ecx)
3908	jz	L(ExitTail5)
3909	cmp	$6, %ebx
3910	je	L(ExitTail6)
3911	cmpb	$0, 5(%ecx)
3912	jz	L(ExitTail6)
3913	cmp	$7, %ebx
3914	je	L(ExitTail7)
3915	cmpb	$0, 6(%ecx)
3916	jz	L(ExitTail7)
3917	movlpd	(%ecx), %xmm0
3918	movlpd	%xmm0, (%edx)
3919#  ifdef USE_AS_STPCPY
3920	lea	7(%edx), %eax
3921	cmpb	$1, (%eax)
3922	sbb	$-1, %eax
3923#  else
3924	movl	%edx, %eax
3925#  endif
3926	RETURN
3927
3928	.p2align 4
3929L(StrncpyExit4Bytes):
3930	test	%ebx, %ebx
3931	jz	L(ExitTail0)
3932	cmp	$1, %ebx
3933	je	L(ExitTail1)
3934	cmpb	$0, (%ecx)
3935	jz	L(ExitTail1)
3936	cmp	$2, %ebx
3937	je	L(ExitTail2)
3938	cmpb	$0, 1(%ecx)
3939	jz	L(ExitTail2)
3940	cmp	$3, %ebx
3941	je	L(ExitTail3)
3942	cmpb	$0, 2(%ecx)
3943	jz	L(ExitTail3)
3944	movl	(%ecx), %eax
3945	movl	%eax, (%edx)
3946	SAVE_RESULT_TAIL (3)
3947#  ifdef USE_AS_STPCPY
3948	cmpb	$1, (%eax)
3949	sbb	$-1, %eax
3950#  endif
3951	RETURN
3952# endif
3953
3954END (STRCPY)
3955#endif
3956