1/* 2Copyright (c) 2010, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include <private/bionic_asm.h> 32 33#define FOR_ATOM 34#include "cache.h" 35 36#ifndef L 37# define L(label) .L##label 38#endif 39 40#ifndef ALIGN 41# define ALIGN(n) .p2align n 42#endif 43 44#define CFI_PUSH(REG) \ 45 .cfi_adjust_cfa_offset 4; \ 46 .cfi_rel_offset REG, 0 47 48#define CFI_POP(REG) \ 49 .cfi_adjust_cfa_offset -4; \ 50 .cfi_restore REG 51 52#define PUSH(REG) pushl REG; CFI_PUSH(REG) 53#define POP(REG) popl REG; CFI_POP(REG) 54 55#define PARMS 8 /* Preserve EBX. */ 56#define DST PARMS 57#define CHR (DST+4) 58#define LEN (CHR+4) 59#define CHK_DST_LEN (LEN+4) 60#define SETRTNVAL movl DST(%esp), %eax 61 62#define ENTRANCE PUSH(%ebx); 63#define RETURN_END POP(%ebx); ret 64#define RETURN RETURN_END; CFI_PUSH(%ebx) 65#define JMPTBL(I, B) I - B 66 67/* Load an entry in a jump table into EBX and branch to it. TABLE is a 68 jump table with relative offsets. */ 69# define BRANCH_TO_JMPTBL_ENTRY(TABLE) \ 70 /* We first load PC into EBX. */ \ 71 call __x86.get_pc_thunk.bx; \ 72 /* Get the address of the jump table. */ \ 73 add $(TABLE - .), %ebx; \ 74 /* Get the entry and convert the relative offset to the \ 75 absolute address. */ \ 76 add (%ebx,%ecx,4), %ebx; \ 77 add %ecx, %edx; \ 78 /* We loaded the jump table and adjusted EDX. Go. */ \ 79 jmp *%ebx 80 81ENTRY(__memset_chk_atom) 82 ENTRANCE 83 84 movl LEN(%esp), %ecx 85 cmpl CHK_DST_LEN(%esp), %ecx 86 jna L(memset_length_loaded) 87 88 POP(%ebx) // Undo ENTRANCE without returning. 89 jmp __memset_chk_fail 90END(__memset_chk_atom) 91 92 .section .text.sse2,"ax",@progbits 93 ALIGN(4) 94ENTRY(memset_atom) 95 ENTRANCE 96 97 movl LEN(%esp), %ecx 98L(memset_length_loaded): 99 movzbl CHR(%esp), %eax 100 movb %al, %ah 101 /* Fill the whole EAX with pattern. */ 102 movl %eax, %edx 103 shl $16, %eax 104 or %edx, %eax 105 movl DST(%esp), %edx 106 cmp $32, %ecx 107 jae L(32bytesormore) 108 109L(write_less32bytes): 110 BRANCH_TO_JMPTBL_ENTRY(L(table_less_32bytes)) 111 112 113 .pushsection .rodata.sse2,"a",@progbits 114 ALIGN(2) 115L(table_less_32bytes): 116 .int JMPTBL(L(write_0bytes), L(table_less_32bytes)) 117 .int JMPTBL(L(write_1bytes), L(table_less_32bytes)) 118 .int JMPTBL(L(write_2bytes), L(table_less_32bytes)) 119 .int JMPTBL(L(write_3bytes), L(table_less_32bytes)) 120 .int JMPTBL(L(write_4bytes), L(table_less_32bytes)) 121 .int JMPTBL(L(write_5bytes), L(table_less_32bytes)) 122 .int JMPTBL(L(write_6bytes), L(table_less_32bytes)) 123 .int JMPTBL(L(write_7bytes), L(table_less_32bytes)) 124 .int JMPTBL(L(write_8bytes), L(table_less_32bytes)) 125 .int JMPTBL(L(write_9bytes), L(table_less_32bytes)) 126 .int JMPTBL(L(write_10bytes), L(table_less_32bytes)) 127 .int JMPTBL(L(write_11bytes), L(table_less_32bytes)) 128 .int JMPTBL(L(write_12bytes), L(table_less_32bytes)) 129 .int JMPTBL(L(write_13bytes), L(table_less_32bytes)) 130 .int JMPTBL(L(write_14bytes), L(table_less_32bytes)) 131 .int JMPTBL(L(write_15bytes), L(table_less_32bytes)) 132 .int JMPTBL(L(write_16bytes), L(table_less_32bytes)) 133 .int JMPTBL(L(write_17bytes), L(table_less_32bytes)) 134 .int JMPTBL(L(write_18bytes), L(table_less_32bytes)) 135 .int JMPTBL(L(write_19bytes), L(table_less_32bytes)) 136 .int JMPTBL(L(write_20bytes), L(table_less_32bytes)) 137 .int JMPTBL(L(write_21bytes), L(table_less_32bytes)) 138 .int JMPTBL(L(write_22bytes), L(table_less_32bytes)) 139 .int JMPTBL(L(write_23bytes), L(table_less_32bytes)) 140 .int JMPTBL(L(write_24bytes), L(table_less_32bytes)) 141 .int JMPTBL(L(write_25bytes), L(table_less_32bytes)) 142 .int JMPTBL(L(write_26bytes), L(table_less_32bytes)) 143 .int JMPTBL(L(write_27bytes), L(table_less_32bytes)) 144 .int JMPTBL(L(write_28bytes), L(table_less_32bytes)) 145 .int JMPTBL(L(write_29bytes), L(table_less_32bytes)) 146 .int JMPTBL(L(write_30bytes), L(table_less_32bytes)) 147 .int JMPTBL(L(write_31bytes), L(table_less_32bytes)) 148 .popsection 149 150 ALIGN(4) 151L(write_28bytes): 152 movl %eax, -28(%edx) 153L(write_24bytes): 154 movl %eax, -24(%edx) 155L(write_20bytes): 156 movl %eax, -20(%edx) 157L(write_16bytes): 158 movl %eax, -16(%edx) 159L(write_12bytes): 160 movl %eax, -12(%edx) 161L(write_8bytes): 162 movl %eax, -8(%edx) 163L(write_4bytes): 164 movl %eax, -4(%edx) 165L(write_0bytes): 166 SETRTNVAL 167 RETURN 168 169 ALIGN(4) 170L(write_29bytes): 171 movl %eax, -29(%edx) 172L(write_25bytes): 173 movl %eax, -25(%edx) 174L(write_21bytes): 175 movl %eax, -21(%edx) 176L(write_17bytes): 177 movl %eax, -17(%edx) 178L(write_13bytes): 179 movl %eax, -13(%edx) 180L(write_9bytes): 181 movl %eax, -9(%edx) 182L(write_5bytes): 183 movl %eax, -5(%edx) 184L(write_1bytes): 185 movb %al, -1(%edx) 186 SETRTNVAL 187 RETURN 188 189 ALIGN(4) 190L(write_30bytes): 191 movl %eax, -30(%edx) 192L(write_26bytes): 193 movl %eax, -26(%edx) 194L(write_22bytes): 195 movl %eax, -22(%edx) 196L(write_18bytes): 197 movl %eax, -18(%edx) 198L(write_14bytes): 199 movl %eax, -14(%edx) 200L(write_10bytes): 201 movl %eax, -10(%edx) 202L(write_6bytes): 203 movl %eax, -6(%edx) 204L(write_2bytes): 205 movw %ax, -2(%edx) 206 SETRTNVAL 207 RETURN 208 209 ALIGN(4) 210L(write_31bytes): 211 movl %eax, -31(%edx) 212L(write_27bytes): 213 movl %eax, -27(%edx) 214L(write_23bytes): 215 movl %eax, -23(%edx) 216L(write_19bytes): 217 movl %eax, -19(%edx) 218L(write_15bytes): 219 movl %eax, -15(%edx) 220L(write_11bytes): 221 movl %eax, -11(%edx) 222L(write_7bytes): 223 movl %eax, -7(%edx) 224L(write_3bytes): 225 movw %ax, -3(%edx) 226 movb %al, -1(%edx) 227 SETRTNVAL 228 RETURN 229 230 ALIGN(4) 231/* ECX > 32 and EDX is 4 byte aligned. */ 232L(32bytesormore): 233 /* Fill xmm0 with the pattern. */ 234 movd %eax, %xmm0 235 pshufd $0, %xmm0, %xmm0 236 testl $0xf, %edx 237 jz L(aligned_16) 238/* ECX > 32 and EDX is not 16 byte aligned. */ 239L(not_aligned_16): 240 movdqu %xmm0, (%edx) 241 movl %edx, %eax 242 and $-16, %edx 243 add $16, %edx 244 sub %edx, %eax 245 add %eax, %ecx 246 movd %xmm0, %eax 247 248 ALIGN(4) 249L(aligned_16): 250 cmp $128, %ecx 251 jae L(128bytesormore) 252 253L(aligned_16_less128bytes): 254 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 255 256 ALIGN(4) 257L(128bytesormore): 258 PUSH(%ebx) 259 mov $SHARED_CACHE_SIZE, %ebx 260 cmp %ebx, %ecx 261 jae L(128bytesormore_nt_start) 262 263 264 POP(%ebx) 265# define RESTORE_EBX_STATE CFI_PUSH(%ebx) 266 cmp $DATA_CACHE_SIZE, %ecx 267 268 jae L(128bytes_L2_normal) 269 subl $128, %ecx 270L(128bytesormore_normal): 271 sub $128, %ecx 272 movdqa %xmm0, (%edx) 273 movdqa %xmm0, 0x10(%edx) 274 movdqa %xmm0, 0x20(%edx) 275 movdqa %xmm0, 0x30(%edx) 276 movdqa %xmm0, 0x40(%edx) 277 movdqa %xmm0, 0x50(%edx) 278 movdqa %xmm0, 0x60(%edx) 279 movdqa %xmm0, 0x70(%edx) 280 lea 128(%edx), %edx 281 jb L(128bytesless_normal) 282 283 284 sub $128, %ecx 285 movdqa %xmm0, (%edx) 286 movdqa %xmm0, 0x10(%edx) 287 movdqa %xmm0, 0x20(%edx) 288 movdqa %xmm0, 0x30(%edx) 289 movdqa %xmm0, 0x40(%edx) 290 movdqa %xmm0, 0x50(%edx) 291 movdqa %xmm0, 0x60(%edx) 292 movdqa %xmm0, 0x70(%edx) 293 lea 128(%edx), %edx 294 jae L(128bytesormore_normal) 295 296L(128bytesless_normal): 297 add $128, %ecx 298 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 299 300 ALIGN(4) 301L(128bytes_L2_normal): 302 prefetcht0 0x380(%edx) 303 prefetcht0 0x3c0(%edx) 304 sub $128, %ecx 305 movdqa %xmm0, (%edx) 306 movaps %xmm0, 0x10(%edx) 307 movaps %xmm0, 0x20(%edx) 308 movaps %xmm0, 0x30(%edx) 309 movaps %xmm0, 0x40(%edx) 310 movaps %xmm0, 0x50(%edx) 311 movaps %xmm0, 0x60(%edx) 312 movaps %xmm0, 0x70(%edx) 313 add $128, %edx 314 cmp $128, %ecx 315 jae L(128bytes_L2_normal) 316 317L(128bytesless_L2_normal): 318 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 319 320 RESTORE_EBX_STATE 321L(128bytesormore_nt_start): 322 sub %ebx, %ecx 323 mov %ebx, %eax 324 and $0x7f, %eax 325 add %eax, %ecx 326 movd %xmm0, %eax 327 ALIGN(4) 328L(128bytesormore_shared_cache_loop): 329 prefetcht0 0x3c0(%edx) 330 prefetcht0 0x380(%edx) 331 sub $0x80, %ebx 332 movdqa %xmm0, (%edx) 333 movdqa %xmm0, 0x10(%edx) 334 movdqa %xmm0, 0x20(%edx) 335 movdqa %xmm0, 0x30(%edx) 336 movdqa %xmm0, 0x40(%edx) 337 movdqa %xmm0, 0x50(%edx) 338 movdqa %xmm0, 0x60(%edx) 339 movdqa %xmm0, 0x70(%edx) 340 add $0x80, %edx 341 cmp $0x80, %ebx 342 jae L(128bytesormore_shared_cache_loop) 343 cmp $0x80, %ecx 344 jb L(shared_cache_loop_end) 345 ALIGN(4) 346L(128bytesormore_nt): 347 sub $0x80, %ecx 348 movntdq %xmm0, (%edx) 349 movntdq %xmm0, 0x10(%edx) 350 movntdq %xmm0, 0x20(%edx) 351 movntdq %xmm0, 0x30(%edx) 352 movntdq %xmm0, 0x40(%edx) 353 movntdq %xmm0, 0x50(%edx) 354 movntdq %xmm0, 0x60(%edx) 355 movntdq %xmm0, 0x70(%edx) 356 add $0x80, %edx 357 cmp $0x80, %ecx 358 jae L(128bytesormore_nt) 359 sfence 360L(shared_cache_loop_end): 361 POP(%ebx) 362 BRANCH_TO_JMPTBL_ENTRY(L(table_16_128bytes)) 363 364 365 .pushsection .rodata.sse2,"a",@progbits 366 ALIGN(2) 367L(table_16_128bytes): 368 .int JMPTBL(L(aligned_16_0bytes), L(table_16_128bytes)) 369 .int JMPTBL(L(aligned_16_1bytes), L(table_16_128bytes)) 370 .int JMPTBL(L(aligned_16_2bytes), L(table_16_128bytes)) 371 .int JMPTBL(L(aligned_16_3bytes), L(table_16_128bytes)) 372 .int JMPTBL(L(aligned_16_4bytes), L(table_16_128bytes)) 373 .int JMPTBL(L(aligned_16_5bytes), L(table_16_128bytes)) 374 .int JMPTBL(L(aligned_16_6bytes), L(table_16_128bytes)) 375 .int JMPTBL(L(aligned_16_7bytes), L(table_16_128bytes)) 376 .int JMPTBL(L(aligned_16_8bytes), L(table_16_128bytes)) 377 .int JMPTBL(L(aligned_16_9bytes), L(table_16_128bytes)) 378 .int JMPTBL(L(aligned_16_10bytes), L(table_16_128bytes)) 379 .int JMPTBL(L(aligned_16_11bytes), L(table_16_128bytes)) 380 .int JMPTBL(L(aligned_16_12bytes), L(table_16_128bytes)) 381 .int JMPTBL(L(aligned_16_13bytes), L(table_16_128bytes)) 382 .int JMPTBL(L(aligned_16_14bytes), L(table_16_128bytes)) 383 .int JMPTBL(L(aligned_16_15bytes), L(table_16_128bytes)) 384 .int JMPTBL(L(aligned_16_16bytes), L(table_16_128bytes)) 385 .int JMPTBL(L(aligned_16_17bytes), L(table_16_128bytes)) 386 .int JMPTBL(L(aligned_16_18bytes), L(table_16_128bytes)) 387 .int JMPTBL(L(aligned_16_19bytes), L(table_16_128bytes)) 388 .int JMPTBL(L(aligned_16_20bytes), L(table_16_128bytes)) 389 .int JMPTBL(L(aligned_16_21bytes), L(table_16_128bytes)) 390 .int JMPTBL(L(aligned_16_22bytes), L(table_16_128bytes)) 391 .int JMPTBL(L(aligned_16_23bytes), L(table_16_128bytes)) 392 .int JMPTBL(L(aligned_16_24bytes), L(table_16_128bytes)) 393 .int JMPTBL(L(aligned_16_25bytes), L(table_16_128bytes)) 394 .int JMPTBL(L(aligned_16_26bytes), L(table_16_128bytes)) 395 .int JMPTBL(L(aligned_16_27bytes), L(table_16_128bytes)) 396 .int JMPTBL(L(aligned_16_28bytes), L(table_16_128bytes)) 397 .int JMPTBL(L(aligned_16_29bytes), L(table_16_128bytes)) 398 .int JMPTBL(L(aligned_16_30bytes), L(table_16_128bytes)) 399 .int JMPTBL(L(aligned_16_31bytes), L(table_16_128bytes)) 400 .int JMPTBL(L(aligned_16_32bytes), L(table_16_128bytes)) 401 .int JMPTBL(L(aligned_16_33bytes), L(table_16_128bytes)) 402 .int JMPTBL(L(aligned_16_34bytes), L(table_16_128bytes)) 403 .int JMPTBL(L(aligned_16_35bytes), L(table_16_128bytes)) 404 .int JMPTBL(L(aligned_16_36bytes), L(table_16_128bytes)) 405 .int JMPTBL(L(aligned_16_37bytes), L(table_16_128bytes)) 406 .int JMPTBL(L(aligned_16_38bytes), L(table_16_128bytes)) 407 .int JMPTBL(L(aligned_16_39bytes), L(table_16_128bytes)) 408 .int JMPTBL(L(aligned_16_40bytes), L(table_16_128bytes)) 409 .int JMPTBL(L(aligned_16_41bytes), L(table_16_128bytes)) 410 .int JMPTBL(L(aligned_16_42bytes), L(table_16_128bytes)) 411 .int JMPTBL(L(aligned_16_43bytes), L(table_16_128bytes)) 412 .int JMPTBL(L(aligned_16_44bytes), L(table_16_128bytes)) 413 .int JMPTBL(L(aligned_16_45bytes), L(table_16_128bytes)) 414 .int JMPTBL(L(aligned_16_46bytes), L(table_16_128bytes)) 415 .int JMPTBL(L(aligned_16_47bytes), L(table_16_128bytes)) 416 .int JMPTBL(L(aligned_16_48bytes), L(table_16_128bytes)) 417 .int JMPTBL(L(aligned_16_49bytes), L(table_16_128bytes)) 418 .int JMPTBL(L(aligned_16_50bytes), L(table_16_128bytes)) 419 .int JMPTBL(L(aligned_16_51bytes), L(table_16_128bytes)) 420 .int JMPTBL(L(aligned_16_52bytes), L(table_16_128bytes)) 421 .int JMPTBL(L(aligned_16_53bytes), L(table_16_128bytes)) 422 .int JMPTBL(L(aligned_16_54bytes), L(table_16_128bytes)) 423 .int JMPTBL(L(aligned_16_55bytes), L(table_16_128bytes)) 424 .int JMPTBL(L(aligned_16_56bytes), L(table_16_128bytes)) 425 .int JMPTBL(L(aligned_16_57bytes), L(table_16_128bytes)) 426 .int JMPTBL(L(aligned_16_58bytes), L(table_16_128bytes)) 427 .int JMPTBL(L(aligned_16_59bytes), L(table_16_128bytes)) 428 .int JMPTBL(L(aligned_16_60bytes), L(table_16_128bytes)) 429 .int JMPTBL(L(aligned_16_61bytes), L(table_16_128bytes)) 430 .int JMPTBL(L(aligned_16_62bytes), L(table_16_128bytes)) 431 .int JMPTBL(L(aligned_16_63bytes), L(table_16_128bytes)) 432 .int JMPTBL(L(aligned_16_64bytes), L(table_16_128bytes)) 433 .int JMPTBL(L(aligned_16_65bytes), L(table_16_128bytes)) 434 .int JMPTBL(L(aligned_16_66bytes), L(table_16_128bytes)) 435 .int JMPTBL(L(aligned_16_67bytes), L(table_16_128bytes)) 436 .int JMPTBL(L(aligned_16_68bytes), L(table_16_128bytes)) 437 .int JMPTBL(L(aligned_16_69bytes), L(table_16_128bytes)) 438 .int JMPTBL(L(aligned_16_70bytes), L(table_16_128bytes)) 439 .int JMPTBL(L(aligned_16_71bytes), L(table_16_128bytes)) 440 .int JMPTBL(L(aligned_16_72bytes), L(table_16_128bytes)) 441 .int JMPTBL(L(aligned_16_73bytes), L(table_16_128bytes)) 442 .int JMPTBL(L(aligned_16_74bytes), L(table_16_128bytes)) 443 .int JMPTBL(L(aligned_16_75bytes), L(table_16_128bytes)) 444 .int JMPTBL(L(aligned_16_76bytes), L(table_16_128bytes)) 445 .int JMPTBL(L(aligned_16_77bytes), L(table_16_128bytes)) 446 .int JMPTBL(L(aligned_16_78bytes), L(table_16_128bytes)) 447 .int JMPTBL(L(aligned_16_79bytes), L(table_16_128bytes)) 448 .int JMPTBL(L(aligned_16_80bytes), L(table_16_128bytes)) 449 .int JMPTBL(L(aligned_16_81bytes), L(table_16_128bytes)) 450 .int JMPTBL(L(aligned_16_82bytes), L(table_16_128bytes)) 451 .int JMPTBL(L(aligned_16_83bytes), L(table_16_128bytes)) 452 .int JMPTBL(L(aligned_16_84bytes), L(table_16_128bytes)) 453 .int JMPTBL(L(aligned_16_85bytes), L(table_16_128bytes)) 454 .int JMPTBL(L(aligned_16_86bytes), L(table_16_128bytes)) 455 .int JMPTBL(L(aligned_16_87bytes), L(table_16_128bytes)) 456 .int JMPTBL(L(aligned_16_88bytes), L(table_16_128bytes)) 457 .int JMPTBL(L(aligned_16_89bytes), L(table_16_128bytes)) 458 .int JMPTBL(L(aligned_16_90bytes), L(table_16_128bytes)) 459 .int JMPTBL(L(aligned_16_91bytes), L(table_16_128bytes)) 460 .int JMPTBL(L(aligned_16_92bytes), L(table_16_128bytes)) 461 .int JMPTBL(L(aligned_16_93bytes), L(table_16_128bytes)) 462 .int JMPTBL(L(aligned_16_94bytes), L(table_16_128bytes)) 463 .int JMPTBL(L(aligned_16_95bytes), L(table_16_128bytes)) 464 .int JMPTBL(L(aligned_16_96bytes), L(table_16_128bytes)) 465 .int JMPTBL(L(aligned_16_97bytes), L(table_16_128bytes)) 466 .int JMPTBL(L(aligned_16_98bytes), L(table_16_128bytes)) 467 .int JMPTBL(L(aligned_16_99bytes), L(table_16_128bytes)) 468 .int JMPTBL(L(aligned_16_100bytes), L(table_16_128bytes)) 469 .int JMPTBL(L(aligned_16_101bytes), L(table_16_128bytes)) 470 .int JMPTBL(L(aligned_16_102bytes), L(table_16_128bytes)) 471 .int JMPTBL(L(aligned_16_103bytes), L(table_16_128bytes)) 472 .int JMPTBL(L(aligned_16_104bytes), L(table_16_128bytes)) 473 .int JMPTBL(L(aligned_16_105bytes), L(table_16_128bytes)) 474 .int JMPTBL(L(aligned_16_106bytes), L(table_16_128bytes)) 475 .int JMPTBL(L(aligned_16_107bytes), L(table_16_128bytes)) 476 .int JMPTBL(L(aligned_16_108bytes), L(table_16_128bytes)) 477 .int JMPTBL(L(aligned_16_109bytes), L(table_16_128bytes)) 478 .int JMPTBL(L(aligned_16_110bytes), L(table_16_128bytes)) 479 .int JMPTBL(L(aligned_16_111bytes), L(table_16_128bytes)) 480 .int JMPTBL(L(aligned_16_112bytes), L(table_16_128bytes)) 481 .int JMPTBL(L(aligned_16_113bytes), L(table_16_128bytes)) 482 .int JMPTBL(L(aligned_16_114bytes), L(table_16_128bytes)) 483 .int JMPTBL(L(aligned_16_115bytes), L(table_16_128bytes)) 484 .int JMPTBL(L(aligned_16_116bytes), L(table_16_128bytes)) 485 .int JMPTBL(L(aligned_16_117bytes), L(table_16_128bytes)) 486 .int JMPTBL(L(aligned_16_118bytes), L(table_16_128bytes)) 487 .int JMPTBL(L(aligned_16_119bytes), L(table_16_128bytes)) 488 .int JMPTBL(L(aligned_16_120bytes), L(table_16_128bytes)) 489 .int JMPTBL(L(aligned_16_121bytes), L(table_16_128bytes)) 490 .int JMPTBL(L(aligned_16_122bytes), L(table_16_128bytes)) 491 .int JMPTBL(L(aligned_16_123bytes), L(table_16_128bytes)) 492 .int JMPTBL(L(aligned_16_124bytes), L(table_16_128bytes)) 493 .int JMPTBL(L(aligned_16_125bytes), L(table_16_128bytes)) 494 .int JMPTBL(L(aligned_16_126bytes), L(table_16_128bytes)) 495 .int JMPTBL(L(aligned_16_127bytes), L(table_16_128bytes)) 496 .popsection 497 498 ALIGN(4) 499L(aligned_16_112bytes): 500 movdqa %xmm0, -112(%edx) 501L(aligned_16_96bytes): 502 movdqa %xmm0, -96(%edx) 503L(aligned_16_80bytes): 504 movdqa %xmm0, -80(%edx) 505L(aligned_16_64bytes): 506 movdqa %xmm0, -64(%edx) 507L(aligned_16_48bytes): 508 movdqa %xmm0, -48(%edx) 509L(aligned_16_32bytes): 510 movdqa %xmm0, -32(%edx) 511L(aligned_16_16bytes): 512 movdqa %xmm0, -16(%edx) 513L(aligned_16_0bytes): 514 SETRTNVAL 515 RETURN 516 517 ALIGN(4) 518L(aligned_16_113bytes): 519 movdqa %xmm0, -113(%edx) 520L(aligned_16_97bytes): 521 movdqa %xmm0, -97(%edx) 522L(aligned_16_81bytes): 523 movdqa %xmm0, -81(%edx) 524L(aligned_16_65bytes): 525 movdqa %xmm0, -65(%edx) 526L(aligned_16_49bytes): 527 movdqa %xmm0, -49(%edx) 528L(aligned_16_33bytes): 529 movdqa %xmm0, -33(%edx) 530L(aligned_16_17bytes): 531 movdqa %xmm0, -17(%edx) 532L(aligned_16_1bytes): 533 movb %al, -1(%edx) 534 SETRTNVAL 535 RETURN 536 537 ALIGN(4) 538L(aligned_16_114bytes): 539 movdqa %xmm0, -114(%edx) 540L(aligned_16_98bytes): 541 movdqa %xmm0, -98(%edx) 542L(aligned_16_82bytes): 543 movdqa %xmm0, -82(%edx) 544L(aligned_16_66bytes): 545 movdqa %xmm0, -66(%edx) 546L(aligned_16_50bytes): 547 movdqa %xmm0, -50(%edx) 548L(aligned_16_34bytes): 549 movdqa %xmm0, -34(%edx) 550L(aligned_16_18bytes): 551 movdqa %xmm0, -18(%edx) 552L(aligned_16_2bytes): 553 movw %ax, -2(%edx) 554 SETRTNVAL 555 RETURN 556 557 ALIGN(4) 558L(aligned_16_115bytes): 559 movdqa %xmm0, -115(%edx) 560L(aligned_16_99bytes): 561 movdqa %xmm0, -99(%edx) 562L(aligned_16_83bytes): 563 movdqa %xmm0, -83(%edx) 564L(aligned_16_67bytes): 565 movdqa %xmm0, -67(%edx) 566L(aligned_16_51bytes): 567 movdqa %xmm0, -51(%edx) 568L(aligned_16_35bytes): 569 movdqa %xmm0, -35(%edx) 570L(aligned_16_19bytes): 571 movdqa %xmm0, -19(%edx) 572L(aligned_16_3bytes): 573 movw %ax, -3(%edx) 574 movb %al, -1(%edx) 575 SETRTNVAL 576 RETURN 577 578 ALIGN(4) 579L(aligned_16_116bytes): 580 movdqa %xmm0, -116(%edx) 581L(aligned_16_100bytes): 582 movdqa %xmm0, -100(%edx) 583L(aligned_16_84bytes): 584 movdqa %xmm0, -84(%edx) 585L(aligned_16_68bytes): 586 movdqa %xmm0, -68(%edx) 587L(aligned_16_52bytes): 588 movdqa %xmm0, -52(%edx) 589L(aligned_16_36bytes): 590 movdqa %xmm0, -36(%edx) 591L(aligned_16_20bytes): 592 movdqa %xmm0, -20(%edx) 593L(aligned_16_4bytes): 594 movl %eax, -4(%edx) 595 SETRTNVAL 596 RETURN 597 598 ALIGN(4) 599L(aligned_16_117bytes): 600 movdqa %xmm0, -117(%edx) 601L(aligned_16_101bytes): 602 movdqa %xmm0, -101(%edx) 603L(aligned_16_85bytes): 604 movdqa %xmm0, -85(%edx) 605L(aligned_16_69bytes): 606 movdqa %xmm0, -69(%edx) 607L(aligned_16_53bytes): 608 movdqa %xmm0, -53(%edx) 609L(aligned_16_37bytes): 610 movdqa %xmm0, -37(%edx) 611L(aligned_16_21bytes): 612 movdqa %xmm0, -21(%edx) 613L(aligned_16_5bytes): 614 movl %eax, -5(%edx) 615 movb %al, -1(%edx) 616 SETRTNVAL 617 RETURN 618 619 ALIGN(4) 620L(aligned_16_118bytes): 621 movdqa %xmm0, -118(%edx) 622L(aligned_16_102bytes): 623 movdqa %xmm0, -102(%edx) 624L(aligned_16_86bytes): 625 movdqa %xmm0, -86(%edx) 626L(aligned_16_70bytes): 627 movdqa %xmm0, -70(%edx) 628L(aligned_16_54bytes): 629 movdqa %xmm0, -54(%edx) 630L(aligned_16_38bytes): 631 movdqa %xmm0, -38(%edx) 632L(aligned_16_22bytes): 633 movdqa %xmm0, -22(%edx) 634L(aligned_16_6bytes): 635 movl %eax, -6(%edx) 636 movw %ax, -2(%edx) 637 SETRTNVAL 638 RETURN 639 640 ALIGN(4) 641L(aligned_16_119bytes): 642 movdqa %xmm0, -119(%edx) 643L(aligned_16_103bytes): 644 movdqa %xmm0, -103(%edx) 645L(aligned_16_87bytes): 646 movdqa %xmm0, -87(%edx) 647L(aligned_16_71bytes): 648 movdqa %xmm0, -71(%edx) 649L(aligned_16_55bytes): 650 movdqa %xmm0, -55(%edx) 651L(aligned_16_39bytes): 652 movdqa %xmm0, -39(%edx) 653L(aligned_16_23bytes): 654 movdqa %xmm0, -23(%edx) 655L(aligned_16_7bytes): 656 movl %eax, -7(%edx) 657 movw %ax, -3(%edx) 658 movb %al, -1(%edx) 659 SETRTNVAL 660 RETURN 661 662 ALIGN(4) 663L(aligned_16_120bytes): 664 movdqa %xmm0, -120(%edx) 665L(aligned_16_104bytes): 666 movdqa %xmm0, -104(%edx) 667L(aligned_16_88bytes): 668 movdqa %xmm0, -88(%edx) 669L(aligned_16_72bytes): 670 movdqa %xmm0, -72(%edx) 671L(aligned_16_56bytes): 672 movdqa %xmm0, -56(%edx) 673L(aligned_16_40bytes): 674 movdqa %xmm0, -40(%edx) 675L(aligned_16_24bytes): 676 movdqa %xmm0, -24(%edx) 677L(aligned_16_8bytes): 678 movq %xmm0, -8(%edx) 679 SETRTNVAL 680 RETURN 681 682 ALIGN(4) 683L(aligned_16_121bytes): 684 movdqa %xmm0, -121(%edx) 685L(aligned_16_105bytes): 686 movdqa %xmm0, -105(%edx) 687L(aligned_16_89bytes): 688 movdqa %xmm0, -89(%edx) 689L(aligned_16_73bytes): 690 movdqa %xmm0, -73(%edx) 691L(aligned_16_57bytes): 692 movdqa %xmm0, -57(%edx) 693L(aligned_16_41bytes): 694 movdqa %xmm0, -41(%edx) 695L(aligned_16_25bytes): 696 movdqa %xmm0, -25(%edx) 697L(aligned_16_9bytes): 698 movq %xmm0, -9(%edx) 699 movb %al, -1(%edx) 700 SETRTNVAL 701 RETURN 702 703 ALIGN(4) 704L(aligned_16_122bytes): 705 movdqa %xmm0, -122(%edx) 706L(aligned_16_106bytes): 707 movdqa %xmm0, -106(%edx) 708L(aligned_16_90bytes): 709 movdqa %xmm0, -90(%edx) 710L(aligned_16_74bytes): 711 movdqa %xmm0, -74(%edx) 712L(aligned_16_58bytes): 713 movdqa %xmm0, -58(%edx) 714L(aligned_16_42bytes): 715 movdqa %xmm0, -42(%edx) 716L(aligned_16_26bytes): 717 movdqa %xmm0, -26(%edx) 718L(aligned_16_10bytes): 719 movq %xmm0, -10(%edx) 720 movw %ax, -2(%edx) 721 SETRTNVAL 722 RETURN 723 724 ALIGN(4) 725L(aligned_16_123bytes): 726 movdqa %xmm0, -123(%edx) 727L(aligned_16_107bytes): 728 movdqa %xmm0, -107(%edx) 729L(aligned_16_91bytes): 730 movdqa %xmm0, -91(%edx) 731L(aligned_16_75bytes): 732 movdqa %xmm0, -75(%edx) 733L(aligned_16_59bytes): 734 movdqa %xmm0, -59(%edx) 735L(aligned_16_43bytes): 736 movdqa %xmm0, -43(%edx) 737L(aligned_16_27bytes): 738 movdqa %xmm0, -27(%edx) 739L(aligned_16_11bytes): 740 movq %xmm0, -11(%edx) 741 movw %ax, -3(%edx) 742 movb %al, -1(%edx) 743 SETRTNVAL 744 RETURN 745 746 ALIGN(4) 747L(aligned_16_124bytes): 748 movdqa %xmm0, -124(%edx) 749L(aligned_16_108bytes): 750 movdqa %xmm0, -108(%edx) 751L(aligned_16_92bytes): 752 movdqa %xmm0, -92(%edx) 753L(aligned_16_76bytes): 754 movdqa %xmm0, -76(%edx) 755L(aligned_16_60bytes): 756 movdqa %xmm0, -60(%edx) 757L(aligned_16_44bytes): 758 movdqa %xmm0, -44(%edx) 759L(aligned_16_28bytes): 760 movdqa %xmm0, -28(%edx) 761L(aligned_16_12bytes): 762 movq %xmm0, -12(%edx) 763 movl %eax, -4(%edx) 764 SETRTNVAL 765 RETURN 766 767 ALIGN(4) 768L(aligned_16_125bytes): 769 movdqa %xmm0, -125(%edx) 770L(aligned_16_109bytes): 771 movdqa %xmm0, -109(%edx) 772L(aligned_16_93bytes): 773 movdqa %xmm0, -93(%edx) 774L(aligned_16_77bytes): 775 movdqa %xmm0, -77(%edx) 776L(aligned_16_61bytes): 777 movdqa %xmm0, -61(%edx) 778L(aligned_16_45bytes): 779 movdqa %xmm0, -45(%edx) 780L(aligned_16_29bytes): 781 movdqa %xmm0, -29(%edx) 782L(aligned_16_13bytes): 783 movq %xmm0, -13(%edx) 784 movl %eax, -5(%edx) 785 movb %al, -1(%edx) 786 SETRTNVAL 787 RETURN 788 789 ALIGN(4) 790L(aligned_16_126bytes): 791 movdqa %xmm0, -126(%edx) 792L(aligned_16_110bytes): 793 movdqa %xmm0, -110(%edx) 794L(aligned_16_94bytes): 795 movdqa %xmm0, -94(%edx) 796L(aligned_16_78bytes): 797 movdqa %xmm0, -78(%edx) 798L(aligned_16_62bytes): 799 movdqa %xmm0, -62(%edx) 800L(aligned_16_46bytes): 801 movdqa %xmm0, -46(%edx) 802L(aligned_16_30bytes): 803 movdqa %xmm0, -30(%edx) 804L(aligned_16_14bytes): 805 movq %xmm0, -14(%edx) 806 movl %eax, -6(%edx) 807 movw %ax, -2(%edx) 808 SETRTNVAL 809 RETURN 810 811 ALIGN(4) 812L(aligned_16_127bytes): 813 movdqa %xmm0, -127(%edx) 814L(aligned_16_111bytes): 815 movdqa %xmm0, -111(%edx) 816L(aligned_16_95bytes): 817 movdqa %xmm0, -95(%edx) 818L(aligned_16_79bytes): 819 movdqa %xmm0, -79(%edx) 820L(aligned_16_63bytes): 821 movdqa %xmm0, -63(%edx) 822L(aligned_16_47bytes): 823 movdqa %xmm0, -47(%edx) 824L(aligned_16_31bytes): 825 movdqa %xmm0, -31(%edx) 826L(aligned_16_15bytes): 827 movq %xmm0, -15(%edx) 828 movl %eax, -7(%edx) 829 movw %ax, -3(%edx) 830 movb %al, -1(%edx) 831 SETRTNVAL 832 RETURN_END 833 834END(memset_atom) 835