1/*
2Copyright (c) 2010, Intel Corporation
3All rights reserved.
4
5Redistribution and use in source and binary forms, with or without
6modification, are permitted provided that the following conditions are met:
7
8    * Redistributions of source code must retain the above copyright notice,
9    * this list of conditions and the following disclaimer.
10
11    * Redistributions in binary form must reproduce the above copyright notice,
12    * this list of conditions and the following disclaimer in the documentation
13    * and/or other materials provided with the distribution.
14
15    * Neither the name of Intel Corporation nor the names of its contributors
16    * may be used to endorse or promote products derived from this software
17    * without specific prior written permission.
18
19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR
23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29*/
30
31#include <private/bionic_asm.h>
32
33#include "cache.h"
34
35#ifndef L
36# define L(label)	.L##label
37#endif
38
39#ifndef ALIGN
40# define ALIGN(n)	.p2align n
41#endif
42
43#define CFI_PUSH(REG)						\
44  .cfi_adjust_cfa_offset 4;					\
45  .cfi_rel_offset REG, 0
46
47#define CFI_POP(REG)						\
48  .cfi_adjust_cfa_offset -4;					\
49  .cfi_restore REG
50
51#define PUSH(REG)	pushl REG; CFI_PUSH(REG)
52#define POP(REG)	popl REG; CFI_POP(REG)
53
54#define PARMS 8  /* Preserve EBX. */
55#define DST PARMS
56#define CHR (DST+4)
57#define LEN (CHR+4)
58#define CHK_DST_LEN (LEN+4)
59#define SETRTNVAL	movl DST(%esp), %eax
60
61#define ENTRANCE	PUSH(%ebx);
62#define RETURN_END	POP(%ebx); ret
63#define RETURN		RETURN_END; CFI_PUSH(%ebx)
64#define JMPTBL(I, B)	I - B
65
66/* Load an entry in a jump table into EBX and branch to it.  TABLE is a
67   jump table with relative offsets.   */
68# define BRANCH_TO_JMPTBL_ENTRY(TABLE)				\
69    /* We first load PC into EBX.  */				\
70    call	__x86.get_pc_thunk.bx;				\
71    /* Get the address of the jump table.  */			\
72    add		$(TABLE - .), %ebx;				\
73    /* Get the entry and convert the relative offset to the	\
74       absolute address.  */					\
75    add		(%ebx,%ecx,4), %ebx;				\
76    add		%ecx, %edx;					\
77    /* We loaded the jump table and adjusted EDX. Go.  */	\
78    jmp		*%ebx
79
80ENTRY(__memset_chk_atom)
81  ENTRANCE
82
83  movl LEN(%esp), %ecx
84  cmpl CHK_DST_LEN(%esp), %ecx
85  jna L(memset_length_loaded)
86
87  POP(%ebx) // Undo ENTRANCE without returning.
88  jmp __memset_chk_fail
89END(__memset_chk_atom)
90
91	.section .text.sse2,"ax",@progbits
92	ALIGN(4)
93ENTRY(memset_atom)
94	ENTRANCE
95
96	movl	LEN(%esp), %ecx
97L(memset_length_loaded):
98	movzbl	CHR(%esp), %eax
99	movb	%al, %ah
100	/* Fill the whole EAX with pattern.  */
101	movl	%eax, %edx
102	shl	$16, %eax
103	or	%edx, %eax
104	movl	DST(%esp), %edx
105	cmp	$32, %ecx
106	jae	L(32bytesormore)
107
108L(write_less32bytes):
109	BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes))
110
111
112	.pushsection .rodata.sse2,"a",@progbits
113	ALIGN(2)
114L(table_less_32bytes):
115	.int	JMPTBL(L(write_0bytes), L(table_less_32bytes))
116	.int	JMPTBL(L(write_1bytes), L(table_less_32bytes))
117	.int	JMPTBL(L(write_2bytes), L(table_less_32bytes))
118	.int	JMPTBL(L(write_3bytes), L(table_less_32bytes))
119	.int	JMPTBL(L(write_4bytes), L(table_less_32bytes))
120	.int	JMPTBL(L(write_5bytes), L(table_less_32bytes))
121	.int	JMPTBL(L(write_6bytes), L(table_less_32bytes))
122	.int	JMPTBL(L(write_7bytes), L(table_less_32bytes))
123	.int	JMPTBL(L(write_8bytes), L(table_less_32bytes))
124	.int	JMPTBL(L(write_9bytes), L(table_less_32bytes))
125	.int	JMPTBL(L(write_10bytes), L(table_less_32bytes))
126	.int	JMPTBL(L(write_11bytes), L(table_less_32bytes))
127	.int	JMPTBL(L(write_12bytes), L(table_less_32bytes))
128	.int	JMPTBL(L(write_13bytes), L(table_less_32bytes))
129	.int	JMPTBL(L(write_14bytes), L(table_less_32bytes))
130	.int	JMPTBL(L(write_15bytes), L(table_less_32bytes))
131	.int	JMPTBL(L(write_16bytes), L(table_less_32bytes))
132	.int	JMPTBL(L(write_17bytes), L(table_less_32bytes))
133	.int	JMPTBL(L(write_18bytes), L(table_less_32bytes))
134	.int	JMPTBL(L(write_19bytes), L(table_less_32bytes))
135	.int	JMPTBL(L(write_20bytes), L(table_less_32bytes))
136	.int	JMPTBL(L(write_21bytes), L(table_less_32bytes))
137	.int	JMPTBL(L(write_22bytes), L(table_less_32bytes))
138	.int	JMPTBL(L(write_23bytes), L(table_less_32bytes))
139	.int	JMPTBL(L(write_24bytes), L(table_less_32bytes))
140	.int	JMPTBL(L(write_25bytes), L(table_less_32bytes))
141	.int	JMPTBL(L(write_26bytes), L(table_less_32bytes))
142	.int	JMPTBL(L(write_27bytes), L(table_less_32bytes))
143	.int	JMPTBL(L(write_28bytes), L(table_less_32bytes))
144	.int	JMPTBL(L(write_29bytes), L(table_less_32bytes))
145	.int	JMPTBL(L(write_30bytes), L(table_less_32bytes))
146	.int	JMPTBL(L(write_31bytes), L(table_less_32bytes))
147	.popsection
148
149	ALIGN(4)
150L(write_28bytes):
151	movl	%eax, -28(%edx)
152L(write_24bytes):
153	movl	%eax, -24(%edx)
154L(write_20bytes):
155	movl	%eax, -20(%edx)
156L(write_16bytes):
157	movl	%eax, -16(%edx)
158L(write_12bytes):
159	movl	%eax, -12(%edx)
160L(write_8bytes):
161	movl	%eax, -8(%edx)
162L(write_4bytes):
163	movl	%eax, -4(%edx)
164L(write_0bytes):
165	SETRTNVAL
166	RETURN
167
168	ALIGN(4)
169L(write_29bytes):
170	movl	%eax, -29(%edx)
171L(write_25bytes):
172	movl	%eax, -25(%edx)
173L(write_21bytes):
174	movl	%eax, -21(%edx)
175L(write_17bytes):
176	movl	%eax, -17(%edx)
177L(write_13bytes):
178	movl	%eax, -13(%edx)
179L(write_9bytes):
180	movl	%eax, -9(%edx)
181L(write_5bytes):
182	movl	%eax, -5(%edx)
183L(write_1bytes):
184	movb	%al, -1(%edx)
185	SETRTNVAL
186	RETURN
187
188	ALIGN(4)
189L(write_30bytes):
190	movl	%eax, -30(%edx)
191L(write_26bytes):
192	movl	%eax, -26(%edx)
193L(write_22bytes):
194	movl	%eax, -22(%edx)
195L(write_18bytes):
196	movl	%eax, -18(%edx)
197L(write_14bytes):
198	movl	%eax, -14(%edx)
199L(write_10bytes):
200	movl	%eax, -10(%edx)
201L(write_6bytes):
202	movl	%eax, -6(%edx)
203L(write_2bytes):
204	movw	%ax, -2(%edx)
205	SETRTNVAL
206	RETURN
207
208	ALIGN(4)
209L(write_31bytes):
210	movl	%eax, -31(%edx)
211L(write_27bytes):
212	movl	%eax, -27(%edx)
213L(write_23bytes):
214	movl	%eax, -23(%edx)
215L(write_19bytes):
216	movl	%eax, -19(%edx)
217L(write_15bytes):
218	movl	%eax, -15(%edx)
219L(write_11bytes):
220	movl	%eax, -11(%edx)
221L(write_7bytes):
222	movl	%eax, -7(%edx)
223L(write_3bytes):
224	movw	%ax, -3(%edx)
225	movb	%al, -1(%edx)
226	SETRTNVAL
227	RETURN
228
229	ALIGN(4)
230/* ECX > 32 and EDX is 4 byte aligned.  */
231L(32bytesormore):
232	/* Fill xmm0 with the pattern.  */
233	movd	%eax, %xmm0
234	pshufd	$0, %xmm0, %xmm0
235	testl	$0xf, %edx
236	jz	L(aligned_16)
237/* ECX > 32 and EDX is not 16 byte aligned.  */
238L(not_aligned_16):
239	movdqu	%xmm0, (%edx)
240	movl	%edx, %eax
241	and	$-16, %edx
242	add	$16, %edx
243	sub	%edx, %eax
244	add	%eax, %ecx
245	movd	%xmm0, %eax
246
247	ALIGN(4)
248L(aligned_16):
249	cmp	$128, %ecx
250	jae	L(128bytesormore)
251
252L(aligned_16_less128bytes):
253	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
254
255	ALIGN(4)
256L(128bytesormore):
257	PUSH(%ebx)
258	mov	$SHARED_CACHE_SIZE, %ebx
259	cmp	%ebx, %ecx
260	jae	L(128bytesormore_nt_start)
261
262
263	POP(%ebx)
264# define RESTORE_EBX_STATE CFI_PUSH(%ebx)
265	cmp	$DATA_CACHE_SIZE, %ecx
266
267	jae	L(128bytes_L2_normal)
268	subl	$128, %ecx
269L(128bytesormore_normal):
270	sub	$128, %ecx
271	movdqa	%xmm0, (%edx)
272	movdqa	%xmm0, 0x10(%edx)
273	movdqa	%xmm0, 0x20(%edx)
274	movdqa	%xmm0, 0x30(%edx)
275	movdqa	%xmm0, 0x40(%edx)
276	movdqa	%xmm0, 0x50(%edx)
277	movdqa	%xmm0, 0x60(%edx)
278	movdqa	%xmm0, 0x70(%edx)
279	lea	128(%edx), %edx
280	jb	L(128bytesless_normal)
281
282
283	sub	$128, %ecx
284	movdqa	%xmm0, (%edx)
285	movdqa	%xmm0, 0x10(%edx)
286	movdqa	%xmm0, 0x20(%edx)
287	movdqa	%xmm0, 0x30(%edx)
288	movdqa	%xmm0, 0x40(%edx)
289	movdqa	%xmm0, 0x50(%edx)
290	movdqa	%xmm0, 0x60(%edx)
291	movdqa	%xmm0, 0x70(%edx)
292	lea	128(%edx), %edx
293	jae	L(128bytesormore_normal)
294
295L(128bytesless_normal):
296	add	$128, %ecx
297	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
298
299	ALIGN(4)
300L(128bytes_L2_normal):
301	prefetcht0	0x380(%edx)
302	prefetcht0	0x3c0(%edx)
303	sub	$128, %ecx
304	movdqa	%xmm0, (%edx)
305	movaps	%xmm0, 0x10(%edx)
306	movaps	%xmm0, 0x20(%edx)
307	movaps	%xmm0, 0x30(%edx)
308	movaps	%xmm0, 0x40(%edx)
309	movaps	%xmm0, 0x50(%edx)
310	movaps	%xmm0, 0x60(%edx)
311	movaps	%xmm0, 0x70(%edx)
312	add	$128, %edx
313	cmp	$128, %ecx
314	jae	L(128bytes_L2_normal)
315
316L(128bytesless_L2_normal):
317	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
318
319	RESTORE_EBX_STATE
320L(128bytesormore_nt_start):
321	sub	%ebx, %ecx
322	mov	%ebx, %eax
323	and	$0x7f, %eax
324	add	%eax, %ecx
325	movd	%xmm0, %eax
326	ALIGN(4)
327L(128bytesormore_shared_cache_loop):
328	prefetcht0	0x3c0(%edx)
329	prefetcht0	0x380(%edx)
330	sub	$0x80, %ebx
331	movdqa	%xmm0, (%edx)
332	movdqa	%xmm0, 0x10(%edx)
333	movdqa	%xmm0, 0x20(%edx)
334	movdqa	%xmm0, 0x30(%edx)
335	movdqa	%xmm0, 0x40(%edx)
336	movdqa	%xmm0, 0x50(%edx)
337	movdqa	%xmm0, 0x60(%edx)
338	movdqa	%xmm0, 0x70(%edx)
339	add	$0x80, %edx
340	cmp	$0x80, %ebx
341	jae	L(128bytesormore_shared_cache_loop)
342	cmp	$0x80, %ecx
343	jb	L(shared_cache_loop_end)
344	ALIGN(4)
345L(128bytesormore_nt):
346	sub	$0x80, %ecx
347	movntdq	%xmm0, (%edx)
348	movntdq	%xmm0, 0x10(%edx)
349	movntdq	%xmm0, 0x20(%edx)
350	movntdq	%xmm0, 0x30(%edx)
351	movntdq	%xmm0, 0x40(%edx)
352	movntdq	%xmm0, 0x50(%edx)
353	movntdq	%xmm0, 0x60(%edx)
354	movntdq	%xmm0, 0x70(%edx)
355	add	$0x80, %edx
356	cmp	$0x80, %ecx
357	jae	L(128bytesormore_nt)
358	sfence
359L(shared_cache_loop_end):
360	POP(%ebx)
361	BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes))
362
363
364	.pushsection .rodata.sse2,"a",@progbits
365	ALIGN(2)
366L(table_16_128bytes):
367	.int	JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes))
368	.int	JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes))
369	.int	JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes))
370	.int	JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes))
371	.int	JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes))
372	.int	JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes))
373	.int	JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes))
374	.int	JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes))
375	.int	JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes))
376	.int	JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes))
377	.int	JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes))
378	.int	JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes))
379	.int	JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes))
380	.int	JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes))
381	.int	JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes))
382	.int	JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes))
383	.int	JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes))
384	.int	JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes))
385	.int	JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes))
386	.int	JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes))
387	.int	JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes))
388	.int	JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes))
389	.int	JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes))
390	.int	JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes))
391	.int	JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes))
392	.int	JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes))
393	.int	JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes))
394	.int	JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes))
395	.int	JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes))
396	.int	JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes))
397	.int	JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes))
398	.int	JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes))
399	.int	JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes))
400	.int	JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes))
401	.int	JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes))
402	.int	JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes))
403	.int	JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes))
404	.int	JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes))
405	.int	JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes))
406	.int	JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes))
407	.int	JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes))
408	.int	JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes))
409	.int	JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes))
410	.int	JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes))
411	.int	JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes))
412	.int	JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes))
413	.int	JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes))
414	.int	JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes))
415	.int	JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes))
416	.int	JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes))
417	.int	JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes))
418	.int	JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes))
419	.int	JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes))
420	.int	JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes))
421	.int	JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes))
422	.int	JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes))
423	.int	JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes))
424	.int	JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes))
425	.int	JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes))
426	.int	JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes))
427	.int	JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes))
428	.int	JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes))
429	.int	JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes))
430	.int	JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes))
431	.int	JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes))
432	.int	JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes))
433	.int	JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes))
434	.int	JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes))
435	.int	JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes))
436	.int	JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes))
437	.int	JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes))
438	.int	JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes))
439	.int	JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes))
440	.int	JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes))
441	.int	JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes))
442	.int	JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes))
443	.int	JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes))
444	.int	JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes))
445	.int	JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes))
446	.int	JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes))
447	.int	JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes))
448	.int	JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes))
449	.int	JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes))
450	.int	JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes))
451	.int	JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes))
452	.int	JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes))
453	.int	JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes))
454	.int	JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes))
455	.int	JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes))
456	.int	JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes))
457	.int	JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes))
458	.int	JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes))
459	.int	JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes))
460	.int	JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes))
461	.int	JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes))
462	.int	JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes))
463	.int	JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes))
464	.int	JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes))
465	.int	JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes))
466	.int	JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes))
467	.int	JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes))
468	.int	JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes))
469	.int	JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes))
470	.int	JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes))
471	.int	JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes))
472	.int	JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes))
473	.int	JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes))
474	.int	JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes))
475	.int	JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes))
476	.int	JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes))
477	.int	JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes))
478	.int	JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes))
479	.int	JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes))
480	.int	JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes))
481	.int	JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes))
482	.int	JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes))
483	.int	JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes))
484	.int	JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes))
485	.int	JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes))
486	.int	JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes))
487	.int	JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes))
488	.int	JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes))
489	.int	JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes))
490	.int	JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes))
491	.int	JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes))
492	.int	JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes))
493	.int	JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes))
494	.int	JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes))
495	.popsection
496
497	ALIGN(4)
498L(aligned_16_112bytes):
499	movdqa	%xmm0, -112(%edx)
500L(aligned_16_96bytes):
501	movdqa	%xmm0, -96(%edx)
502L(aligned_16_80bytes):
503	movdqa	%xmm0, -80(%edx)
504L(aligned_16_64bytes):
505	movdqa	%xmm0, -64(%edx)
506L(aligned_16_48bytes):
507	movdqa	%xmm0, -48(%edx)
508L(aligned_16_32bytes):
509	movdqa	%xmm0, -32(%edx)
510L(aligned_16_16bytes):
511	movdqa	%xmm0, -16(%edx)
512L(aligned_16_0bytes):
513	SETRTNVAL
514	RETURN
515
516	ALIGN(4)
517L(aligned_16_113bytes):
518	movdqa	%xmm0, -113(%edx)
519L(aligned_16_97bytes):
520	movdqa	%xmm0, -97(%edx)
521L(aligned_16_81bytes):
522	movdqa	%xmm0, -81(%edx)
523L(aligned_16_65bytes):
524	movdqa	%xmm0, -65(%edx)
525L(aligned_16_49bytes):
526	movdqa	%xmm0, -49(%edx)
527L(aligned_16_33bytes):
528	movdqa	%xmm0, -33(%edx)
529L(aligned_16_17bytes):
530	movdqa	%xmm0, -17(%edx)
531L(aligned_16_1bytes):
532	movb	%al, -1(%edx)
533	SETRTNVAL
534	RETURN
535
536	ALIGN(4)
537L(aligned_16_114bytes):
538	movdqa	%xmm0, -114(%edx)
539L(aligned_16_98bytes):
540	movdqa	%xmm0, -98(%edx)
541L(aligned_16_82bytes):
542	movdqa	%xmm0, -82(%edx)
543L(aligned_16_66bytes):
544	movdqa	%xmm0, -66(%edx)
545L(aligned_16_50bytes):
546	movdqa	%xmm0, -50(%edx)
547L(aligned_16_34bytes):
548	movdqa	%xmm0, -34(%edx)
549L(aligned_16_18bytes):
550	movdqa	%xmm0, -18(%edx)
551L(aligned_16_2bytes):
552	movw	%ax, -2(%edx)
553	SETRTNVAL
554	RETURN
555
556	ALIGN(4)
557L(aligned_16_115bytes):
558	movdqa	%xmm0, -115(%edx)
559L(aligned_16_99bytes):
560	movdqa	%xmm0, -99(%edx)
561L(aligned_16_83bytes):
562	movdqa	%xmm0, -83(%edx)
563L(aligned_16_67bytes):
564	movdqa	%xmm0, -67(%edx)
565L(aligned_16_51bytes):
566	movdqa	%xmm0, -51(%edx)
567L(aligned_16_35bytes):
568	movdqa	%xmm0, -35(%edx)
569L(aligned_16_19bytes):
570	movdqa	%xmm0, -19(%edx)
571L(aligned_16_3bytes):
572	movw	%ax, -3(%edx)
573	movb	%al, -1(%edx)
574	SETRTNVAL
575	RETURN
576
577	ALIGN(4)
578L(aligned_16_116bytes):
579	movdqa	%xmm0, -116(%edx)
580L(aligned_16_100bytes):
581	movdqa	%xmm0, -100(%edx)
582L(aligned_16_84bytes):
583	movdqa	%xmm0, -84(%edx)
584L(aligned_16_68bytes):
585	movdqa	%xmm0, -68(%edx)
586L(aligned_16_52bytes):
587	movdqa	%xmm0, -52(%edx)
588L(aligned_16_36bytes):
589	movdqa	%xmm0, -36(%edx)
590L(aligned_16_20bytes):
591	movdqa	%xmm0, -20(%edx)
592L(aligned_16_4bytes):
593	movl	%eax, -4(%edx)
594	SETRTNVAL
595	RETURN
596
597	ALIGN(4)
598L(aligned_16_117bytes):
599	movdqa	%xmm0, -117(%edx)
600L(aligned_16_101bytes):
601	movdqa	%xmm0, -101(%edx)
602L(aligned_16_85bytes):
603	movdqa	%xmm0, -85(%edx)
604L(aligned_16_69bytes):
605	movdqa	%xmm0, -69(%edx)
606L(aligned_16_53bytes):
607	movdqa	%xmm0, -53(%edx)
608L(aligned_16_37bytes):
609	movdqa	%xmm0, -37(%edx)
610L(aligned_16_21bytes):
611	movdqa	%xmm0, -21(%edx)
612L(aligned_16_5bytes):
613	movl	%eax, -5(%edx)
614	movb	%al, -1(%edx)
615	SETRTNVAL
616	RETURN
617
618	ALIGN(4)
619L(aligned_16_118bytes):
620	movdqa	%xmm0, -118(%edx)
621L(aligned_16_102bytes):
622	movdqa	%xmm0, -102(%edx)
623L(aligned_16_86bytes):
624	movdqa	%xmm0, -86(%edx)
625L(aligned_16_70bytes):
626	movdqa	%xmm0, -70(%edx)
627L(aligned_16_54bytes):
628	movdqa	%xmm0, -54(%edx)
629L(aligned_16_38bytes):
630	movdqa	%xmm0, -38(%edx)
631L(aligned_16_22bytes):
632	movdqa	%xmm0, -22(%edx)
633L(aligned_16_6bytes):
634	movl	%eax, -6(%edx)
635	movw	%ax, -2(%edx)
636	SETRTNVAL
637	RETURN
638
639	ALIGN(4)
640L(aligned_16_119bytes):
641	movdqa	%xmm0, -119(%edx)
642L(aligned_16_103bytes):
643	movdqa	%xmm0, -103(%edx)
644L(aligned_16_87bytes):
645	movdqa	%xmm0, -87(%edx)
646L(aligned_16_71bytes):
647	movdqa	%xmm0, -71(%edx)
648L(aligned_16_55bytes):
649	movdqa	%xmm0, -55(%edx)
650L(aligned_16_39bytes):
651	movdqa	%xmm0, -39(%edx)
652L(aligned_16_23bytes):
653	movdqa	%xmm0, -23(%edx)
654L(aligned_16_7bytes):
655	movl	%eax, -7(%edx)
656	movw	%ax, -3(%edx)
657	movb	%al, -1(%edx)
658	SETRTNVAL
659	RETURN
660
661	ALIGN(4)
662L(aligned_16_120bytes):
663	movdqa	%xmm0, -120(%edx)
664L(aligned_16_104bytes):
665	movdqa	%xmm0, -104(%edx)
666L(aligned_16_88bytes):
667	movdqa	%xmm0, -88(%edx)
668L(aligned_16_72bytes):
669	movdqa	%xmm0, -72(%edx)
670L(aligned_16_56bytes):
671	movdqa	%xmm0, -56(%edx)
672L(aligned_16_40bytes):
673	movdqa	%xmm0, -40(%edx)
674L(aligned_16_24bytes):
675	movdqa	%xmm0, -24(%edx)
676L(aligned_16_8bytes):
677	movq	%xmm0, -8(%edx)
678	SETRTNVAL
679	RETURN
680
681	ALIGN(4)
682L(aligned_16_121bytes):
683	movdqa	%xmm0, -121(%edx)
684L(aligned_16_105bytes):
685	movdqa	%xmm0, -105(%edx)
686L(aligned_16_89bytes):
687	movdqa	%xmm0, -89(%edx)
688L(aligned_16_73bytes):
689	movdqa	%xmm0, -73(%edx)
690L(aligned_16_57bytes):
691	movdqa	%xmm0, -57(%edx)
692L(aligned_16_41bytes):
693	movdqa	%xmm0, -41(%edx)
694L(aligned_16_25bytes):
695	movdqa	%xmm0, -25(%edx)
696L(aligned_16_9bytes):
697	movq	%xmm0, -9(%edx)
698	movb	%al, -1(%edx)
699	SETRTNVAL
700	RETURN
701
702	ALIGN(4)
703L(aligned_16_122bytes):
704	movdqa	%xmm0, -122(%edx)
705L(aligned_16_106bytes):
706	movdqa	%xmm0, -106(%edx)
707L(aligned_16_90bytes):
708	movdqa	%xmm0, -90(%edx)
709L(aligned_16_74bytes):
710	movdqa	%xmm0, -74(%edx)
711L(aligned_16_58bytes):
712	movdqa	%xmm0, -58(%edx)
713L(aligned_16_42bytes):
714	movdqa	%xmm0, -42(%edx)
715L(aligned_16_26bytes):
716	movdqa	%xmm0, -26(%edx)
717L(aligned_16_10bytes):
718	movq	%xmm0, -10(%edx)
719	movw	%ax, -2(%edx)
720	SETRTNVAL
721	RETURN
722
723	ALIGN(4)
724L(aligned_16_123bytes):
725	movdqa	%xmm0, -123(%edx)
726L(aligned_16_107bytes):
727	movdqa	%xmm0, -107(%edx)
728L(aligned_16_91bytes):
729	movdqa	%xmm0, -91(%edx)
730L(aligned_16_75bytes):
731	movdqa	%xmm0, -75(%edx)
732L(aligned_16_59bytes):
733	movdqa	%xmm0, -59(%edx)
734L(aligned_16_43bytes):
735	movdqa	%xmm0, -43(%edx)
736L(aligned_16_27bytes):
737	movdqa	%xmm0, -27(%edx)
738L(aligned_16_11bytes):
739	movq	%xmm0, -11(%edx)
740	movw	%ax, -3(%edx)
741	movb	%al, -1(%edx)
742	SETRTNVAL
743	RETURN
744
745	ALIGN(4)
746L(aligned_16_124bytes):
747	movdqa	%xmm0, -124(%edx)
748L(aligned_16_108bytes):
749	movdqa	%xmm0, -108(%edx)
750L(aligned_16_92bytes):
751	movdqa	%xmm0, -92(%edx)
752L(aligned_16_76bytes):
753	movdqa	%xmm0, -76(%edx)
754L(aligned_16_60bytes):
755	movdqa	%xmm0, -60(%edx)
756L(aligned_16_44bytes):
757	movdqa	%xmm0, -44(%edx)
758L(aligned_16_28bytes):
759	movdqa	%xmm0, -28(%edx)
760L(aligned_16_12bytes):
761	movq	%xmm0, -12(%edx)
762	movl	%eax, -4(%edx)
763	SETRTNVAL
764	RETURN
765
766	ALIGN(4)
767L(aligned_16_125bytes):
768	movdqa	%xmm0, -125(%edx)
769L(aligned_16_109bytes):
770	movdqa	%xmm0, -109(%edx)
771L(aligned_16_93bytes):
772	movdqa	%xmm0, -93(%edx)
773L(aligned_16_77bytes):
774	movdqa	%xmm0, -77(%edx)
775L(aligned_16_61bytes):
776	movdqa	%xmm0, -61(%edx)
777L(aligned_16_45bytes):
778	movdqa	%xmm0, -45(%edx)
779L(aligned_16_29bytes):
780	movdqa	%xmm0, -29(%edx)
781L(aligned_16_13bytes):
782	movq	%xmm0, -13(%edx)
783	movl	%eax, -5(%edx)
784	movb	%al, -1(%edx)
785	SETRTNVAL
786	RETURN
787
788	ALIGN(4)
789L(aligned_16_126bytes):
790	movdqa	%xmm0, -126(%edx)
791L(aligned_16_110bytes):
792	movdqa	%xmm0, -110(%edx)
793L(aligned_16_94bytes):
794	movdqa	%xmm0, -94(%edx)
795L(aligned_16_78bytes):
796	movdqa	%xmm0, -78(%edx)
797L(aligned_16_62bytes):
798	movdqa	%xmm0, -62(%edx)
799L(aligned_16_46bytes):
800	movdqa	%xmm0, -46(%edx)
801L(aligned_16_30bytes):
802	movdqa	%xmm0, -30(%edx)
803L(aligned_16_14bytes):
804	movq	%xmm0, -14(%edx)
805	movl	%eax, -6(%edx)
806	movw	%ax, -2(%edx)
807	SETRTNVAL
808	RETURN
809
810	ALIGN(4)
811L(aligned_16_127bytes):
812	movdqa	%xmm0, -127(%edx)
813L(aligned_16_111bytes):
814	movdqa	%xmm0, -111(%edx)
815L(aligned_16_95bytes):
816	movdqa	%xmm0, -95(%edx)
817L(aligned_16_79bytes):
818	movdqa	%xmm0, -79(%edx)
819L(aligned_16_63bytes):
820	movdqa	%xmm0, -63(%edx)
821L(aligned_16_47bytes):
822	movdqa	%xmm0, -47(%edx)
823L(aligned_16_31bytes):
824	movdqa	%xmm0, -31(%edx)
825L(aligned_16_15bytes):
826	movq	%xmm0, -15(%edx)
827	movl	%eax, -7(%edx)
828	movw	%ax, -3(%edx)
829	movb	%al, -1(%edx)
830	SETRTNVAL
831	RETURN_END
832
833END(memset_atom)
834